Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
ed444aab
Commit
ed444aab
authored
Jul 29, 2018
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Update ShuffleNet with different configs
parent
7cb2606c
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
72 additions
and
56 deletions
+72
-56
examples/DoReFa-Net/README.md
examples/DoReFa-Net/README.md
+10
-9
examples/ImageNetModels/README.md
examples/ImageNetModels/README.md
+3
-4
examples/ImageNetModels/shufflenet.py
examples/ImageNetModels/shufflenet.py
+57
-41
tensorpack/models/batch_norm.py
tensorpack/models/batch_norm.py
+1
-1
tensorpack/tfutils/gradproc.py
tensorpack/tfutils/gradproc.py
+1
-1
No files found.
examples/DoReFa-Net/README.md
View file @
ed444aab
...
...
@@ -14,13 +14,14 @@ This is a good set of baselines for research in model quantization.
These quantization techniques, when applied on AlexNet, achieves the following ImageNet performance in this implementation:
| Model | Bit Width
<br/>
(weights, activations, gradients) | Top 1 Validation Error
<sup>
[
1
](
#ft1
)
</sup>
|
|:----------------------------------:|:-------------------------------------------------:|:-----------------------------------------------------------------------------:|
|:----------------------------------:|:-------------------------------------------------:|:-----------------------------------------------------------------------------
--
:|
| Full Precision
<sup>
[
2
](
#ft2
)
</sup>
| 32,32,32 | 40.3% |
| TTQ | t,32,32 | 42.0% |
| BWN | 1,32,32 | 44.6% |
| BNN | 1,1,32 | 51.9% |
| DoReFa | 8,8,8 | 42.0%
[
:arrow_down:
](
http://models.tensorpack.com/DoReFa-Net/AlexNet-8,8,8.npz
)
|
| DoReFa | 1,2,32 | 46.6% |
| DoReFa | 1,2,6 | 46.8%
[
:arrow_down:
](
http://models.tensorpack.com/DoReFa-Net/
alexnet-12
6.npz
)
|
| DoReFa | 1,2,6 | 46.8%
[
:arrow_down:
](
http://models.tensorpack.com/DoReFa-Net/
AlexNet-1,2,
6.npz
)
|
| DoReFa | 1,2,4 | 54.0% |
<a
id=
"ft1"
>
1
</a>
: These numbers were obtained by training on 8 GPUs with a total batch size of 256.
...
...
examples/ImageNetModels/README.md
View file @
ed444aab
...
...
@@ -10,8 +10,7 @@ Pretrained models can be downloaded at [tensorpack model zoo](http://models.tens
Reproduce
[
ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices
](
https://arxiv.org/abs/1707.01083
)
on ImageNet.
This is a 38Mflops ShuffleNet, corresponding to
`ShuffleNet 0.5x g=3`
in __the
2nd arxiv version__ of the paper.
This is a 38Mflops ShuffleNet, corresponding to
`ShuffleNet 0.5x g=3`
in the paper.
After 240 epochs (36 hours on 8 P100s) it reaches top-1 error of 42.32%,
matching the paper's number.
...
...
examples/ImageNetModels/shufflenet.py
View file @
ed444aab
...
...
@@ -4,6 +4,7 @@
import
argparse
import
numpy
as
np
import
math
import
os
import
cv2
...
...
@@ -15,6 +16,7 @@ from tensorpack.dataflow import imgaug
from
tensorpack.tfutils
import
argscope
,
get_model_loader
,
model_utils
from
tensorpack.tfutils.scope_utils
import
under_name_scope
from
tensorpack.utils.gpu
import
get_num_gpu
from
tensorpack.utils
import
logger
from
imagenet_utils
import
(
get_imagenet_dataflow
,
...
...
@@ -52,29 +54,24 @@ def channel_shuffle(l, group):
return
l
def
BN
(
x
,
name
=
None
):
return
BatchNorm
(
'bn'
,
x
)
class
Model
(
ImageNetModel
):
weight_decay
=
4e-5
def
get_logits
(
self
,
image
):
def
shufflenet_unit
(
l
,
out_channel
,
group
,
stride
):
@
layer_register
()
def
shufflenet_unit
(
l
,
out_channel
,
group
,
stride
):
in_shape
=
l
.
get_shape
()
.
as_list
()
in_channel
=
in_shape
[
1
]
shortcut
=
l
#
We do not apply group convolution on the first pointwise layer
# because the number of input channels is relatively small.
first_split
=
group
if
in_channel
!=
12
else
1
# "
We do not apply group convolution on the first pointwise layer
# because the number of input channels is relatively small."
first_split
=
group
if
in_channel
>
24
else
1
l
=
Conv2D
(
'conv1'
,
l
,
out_channel
//
4
,
1
,
split
=
first_split
,
activation
=
BNReLU
)
l
=
channel_shuffle
(
l
,
group
)
l
=
DepthConv
(
'dconv'
,
l
,
out_channel
//
4
,
3
,
activation
=
BN
,
stride
=
stride
)
l
=
DepthConv
(
'dconv'
,
l
,
out_channel
//
4
,
3
,
stride
=
stride
)
l
=
BatchNorm
(
'dconv_bn'
,
l
)
l
=
Conv2D
(
'conv2'
,
l
,
out_channel
if
stride
==
1
else
out_channel
-
in_channel
,
1
,
split
=
group
,
activation
=
BN
)
1
,
split
=
group
)
l
=
BatchNorm
(
'conv2_bn'
,
l
)
if
stride
==
1
:
# unit (b)
output
=
tf
.
nn
.
relu
(
shortcut
+
l
)
else
:
# unit (c)
...
...
@@ -82,28 +79,44 @@ class Model(ImageNetModel):
output
=
tf
.
concat
([
shortcut
,
tf
.
nn
.
relu
(
l
)],
axis
=
1
)
return
output
@
layer_register
(
log_shape
=
True
)
def
shufflenet_stage
(
input
,
channel
,
num_blocks
,
group
):
l
=
input
for
i
in
range
(
num_blocks
):
name
=
'block{}'
.
format
(
i
)
l
=
shufflenet_unit
(
name
,
l
,
channel
,
group
,
2
if
i
==
0
else
1
)
return
l
class
Model
(
ImageNetModel
):
weight_decay
=
4e-5
def
get_logits
(
self
,
image
):
with
argscope
([
Conv2D
,
MaxPooling
,
AvgPooling
,
GlobalAvgPooling
,
BatchNorm
],
data_format
=
self
.
data_format
),
\
argscope
(
Conv2D
,
use_bias
=
False
):
group
=
3
channels
=
[
120
,
240
,
480
]
l
=
Conv2D
(
'conv1'
,
image
,
12
,
3
,
strides
=
2
,
activation
=
BNReLU
)
# See Table 1 & 2 in https://arxiv.org/abs/1707.01083
group
=
args
.
group
channels
=
{
3
:
[
240
,
480
,
960
],
4
:
[
272
,
544
,
1088
],
8
:
[
384
,
768
,
1536
]
}
mul
=
group
*
4
# #chan has to be a multiple of this number
channels
=
[
int
(
math
.
ceil
(
x
*
args
.
ratio
/
mul
)
*
mul
)
for
x
in
channels
[
group
]]
# The first channel must be a multiple of group
first_chan
=
int
(
math
.
ceil
(
24
*
args
.
ratio
/
group
)
*
group
)
logger
.
info
(
"#Channels: "
+
str
([
first_chan
]
+
channels
))
l
=
Conv2D
(
'conv1'
,
image
,
first_chan
,
3
,
strides
=
2
,
activation
=
BNReLU
)
l
=
MaxPooling
(
'pool1'
,
l
,
3
,
2
,
padding
=
'SAME'
)
with
tf
.
variable_scope
(
'group1'
):
for
i
in
range
(
4
):
with
tf
.
variable_scope
(
'block{}'
.
format
(
i
)):
l
=
shufflenet_unit
(
l
,
channels
[
0
],
group
,
2
if
i
==
0
else
1
)
with
tf
.
variable_scope
(
'group2'
):
for
i
in
range
(
8
):
with
tf
.
variable_scope
(
'block{}'
.
format
(
i
)):
l
=
shufflenet_unit
(
l
,
channels
[
1
],
group
,
2
if
i
==
0
else
1
)
l
=
shufflenet_stage
(
'group1'
,
l
,
channels
[
0
],
4
,
group
)
l
=
shufflenet_stage
(
'group2'
,
l
,
channels
[
1
],
8
,
group
)
l
=
shufflenet_stage
(
'group3'
,
l
,
channels
[
2
],
4
,
group
)
with
tf
.
variable_scope
(
'group3'
):
for
i
in
range
(
4
):
with
tf
.
variable_scope
(
'block{}'
.
format
(
i
)):
l
=
shufflenet_unit
(
l
,
channels
[
2
],
group
,
2
if
i
==
0
else
1
)
l
=
GlobalAvgPooling
(
'gap'
,
l
)
logits
=
FullyConnected
(
'linear'
,
l
,
1000
)
return
logits
...
...
@@ -179,6 +192,8 @@ if __name__ == '__main__':
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--gpu'
,
help
=
'comma separated list of GPU(s) to use.'
)
parser
.
add_argument
(
'--data'
,
help
=
'ILSVRC dataset dir'
)
parser
.
add_argument
(
'--ratio'
,
type
=
float
,
default
=
0.5
,
choices
=
[
1.
,
0.5
,
0.25
])
parser
.
add_argument
(
'--group'
,
type
=
int
,
default
=
3
,
choices
=
[
3
,
4
,
8
])
parser
.
add_argument
(
'--load'
,
help
=
'load model'
)
parser
.
add_argument
(
'--eval'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--flops'
,
action
=
'store_true'
,
help
=
'print flops and exit'
)
...
...
@@ -210,7 +225,8 @@ if __name__ == '__main__':
cmd
=
'op'
,
options
=
tf
.
profiler
.
ProfileOptionBuilder
.
float_operation
())
else
:
logger
.
set_logger_dir
(
os
.
path
.
join
(
'train_log'
,
'shufflenet'
))
logger
.
set_logger_dir
(
os
.
path
.
join
(
'train_log'
,
'shufflenet-{}x-g={}'
.
format
(
args
.
ratio
,
args
.
group
)))
nr_tower
=
max
(
get_num_gpu
(),
1
)
config
=
get_config
(
model
,
nr_tower
)
...
...
tensorpack/models/batch_norm.py
View file @
ed444aab
...
...
@@ -92,7 +92,7 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5,
They are very similar in speed, but `internal_update=True` can be used
when you have conditionals in your model, or when you have multiple networks to train.
Corresponding TF issue: https://github.com/tensorflow/tensorflow/issues/14699
sync_statistics (str or None): one of None "nccl", or "horovod".
sync_statistics (str or None): one of None
,
"nccl", or "horovod".
By default (None), it uses statistics of the input tensor to normalize.
This is the standard way BatchNorm was done in most frameworks.
...
...
tensorpack/tfutils/gradproc.py
View file @
ed444aab
...
...
@@ -251,7 +251,7 @@ class ScaleGradient(MapGradient):
if
re
.
match
(
regex
,
varname
):
if
self
.
_verbose
:
logger
.
info
(
"
Apply lr multiplier {} for {}"
.
format
(
val
,
varname
))
logger
.
info
(
"
Gradient of '{}' is multipled by {}"
.
format
(
varname
,
val
))
if
val
!=
0
:
# skip zero to speed up
return
grad
*
val
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment