Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
3e9f164d
Commit
3e9f164d
authored
Feb 23, 2018
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upgrade shufflenet; fix paramsetter for restore
parent
9185744d
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
70 additions
and
35 deletions
+70
-35
examples/DeepQNetwork/DQN.py
examples/DeepQNetwork/DQN.py
+1
-1
examples/DeepQNetwork/DQNModel.py
examples/DeepQNetwork/DQNModel.py
+3
-1
examples/FasterRCNN/model.py
examples/FasterRCNN/model.py
+32
-0
examples/ShuffleNet/README.md
examples/ShuffleNet/README.md
+5
-5
examples/ShuffleNet/shufflenet.py
examples/ShuffleNet/shufflenet.py
+16
-12
tensorpack/callbacks/param.py
tensorpack/callbacks/param.py
+13
-16
No files found.
examples/DeepQNetwork/DQN.py
View file @
3e9f164d
...
...
@@ -54,7 +54,7 @@ class Model(DQNModel):
def
_get_DQN_prediction
(
self
,
image
):
""" image: [0,255]"""
image
=
image
/
255.0
with
argscope
(
Conv2D
,
nl
=
PReLU
.
symbolic_function
,
use_bias
=
True
):
with
argscope
(
Conv2D
,
activation
=
lambda
x
:
PReLU
(
'prelu'
,
x
)
,
use_bias
=
True
):
l
=
(
LinearWrap
(
image
)
# Nature architecture
.
Conv2D
(
'conv0'
,
out_channel
=
32
,
kernel_shape
=
8
,
stride
=
4
)
...
...
examples/DeepQNetwork/DQNModel.py
View file @
3e9f164d
...
...
@@ -15,6 +15,8 @@ assert tensorpack.tfutils.common.get_tf_version_number() >= 1.2
class
Model
(
ModelDesc
):
learning_rate
=
1e-3
def
__init__
(
self
,
image_shape
,
channel
,
method
,
num_actions
,
gamma
):
self
.
image_shape
=
image_shape
self
.
channel
=
channel
...
...
@@ -80,7 +82,7 @@ class Model(ModelDesc):
summary
.
add_moving_summary
(
self
.
cost
)
def
_get_optimizer
(
self
):
lr
=
tf
.
get_variable
(
'learning_rate'
,
initializer
=
1e-3
,
trainable
=
False
)
lr
=
tf
.
get_variable
(
'learning_rate'
,
initializer
=
self
.
learning_rate
,
trainable
=
False
)
opt
=
tf
.
train
.
AdamOptimizer
(
lr
,
epsilon
=
1e-3
)
return
optimizer
.
apply_grad_processors
(
opt
,
[
gradproc
.
GlobalNormClip
(
10
),
gradproc
.
SummaryGradient
()])
...
...
examples/FasterRCNN/model.py
View file @
3e9f164d
...
...
@@ -546,3 +546,35 @@ def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
add_moving_summary
(
loss
,
accuracy
,
fg_pixel_ratio
,
pos_accuracy
)
return
loss
if
__name__
==
'__main__'
:
"""
Demonstrate what's wrong with tf.image.crop_and_resize:
"""
import
numpy
as
np
import
tensorflow.contrib.eager
as
tfe
tfe
.
enable_eager_execution
()
# want to crop 2x2 out of a 5x5 image, and resize to 4x4
image
=
np
.
arange
(
25
)
.
astype
(
'float32'
)
.
reshape
(
5
,
5
)
boxes
=
np
.
asarray
([[
1
,
1
,
3
,
3
]],
dtype
=
'float32'
)
target
=
4
print
(
crop_and_resize
(
image
[
None
,
None
,
:,
:],
boxes
,
[
0
],
target
)[
0
][
0
])
"""
Expected values:
4.5 5 5.5 6
7 7.5 8 8.5
9.5 10 10.5 11
12 12.5 13 13.5
Our implementation is not perfect either. When boxes are on the border of
images, TF pads zeros instead of border values. But this rarely happens so it's fine.
You cannot easily get the above results with tf.image.crop_and_resize.
Try out yourself here:
"""
print
(
tf
.
image
.
crop_and_resize
(
image
[
None
,
:,
:,
None
],
np
.
asarray
([[
1
,
1
,
2
,
2
]])
/
4.0
,
[
0
],
[
target
,
target
])[
0
][:,
:,
0
])
examples/ShuffleNet/README.md
View file @
3e9f164d
...
...
@@ -4,8 +4,8 @@
Reproduce
[
ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices
](
https://arxiv.org/abs/1707.01083
)
on ImageNet.
This is a
40Mflops ShuffleNet, corresponding to
`ShuffleNet 0.5x (arch2) g=8`
in
the paper.
After
100 epochs it reaches top-1 error of 42.62, matching
the paper's number.
This is a
38Mflops ShuffleNet, corresponding to
`ShuffleNet 0.5x g=3`
in
[
version 2
](
https://arxiv.org/pdf/1707.01083v2
)
of
the paper.
After
240 epochs it reaches top-1 error of 42.32, better than
the paper's number.
### Usage:
...
...
@@ -13,14 +13,14 @@ Print flops with tensorflow:
```
bash
./shufflenet.py
--flops
```
It will print about
80
Mflops, because the paper counts multiply+add as 1 flop.
It will print about
75
Mflops, because the paper counts multiply+add as 1 flop.
Train (takes
24 hours on 8 Maxwell TitanX
):
Train (takes
36 hours on 8 P100s
):
```
bash
./shufflenet.py
--data
/path/to/ilsvrc/
```
Eval the
[
pretrained model
](
http://models.tensorpack.com/ShuffleNet/
)
:
Eval
uate
the
[
pretrained model
](
http://models.tensorpack.com/ShuffleNet/
)
:
```
./shufflenet.py --eval --data /path/to/ilsvrc --load /path/to/model
```
examples/ShuffleNet/shufflenet.py
View file @
3e9f164d
...
...
@@ -23,7 +23,7 @@ from imagenet_utils import (
get_imagenet_dataflow
,
ImageNetModel
,
GoogleNetResize
,
eval_on_ILSVRC12
)
TOTAL_BATCH_SIZE
=
256
TOTAL_BATCH_SIZE
=
1024
@
layer_register
(
log_shape
=
True
)
...
...
@@ -48,6 +48,7 @@ def DepthConv(x, out_channel, kernel_shape, padding='SAME', stride=1,
def
channel_shuffle
(
l
,
group
):
in_shape
=
l
.
get_shape
()
.
as_list
()
in_channel
=
in_shape
[
1
]
assert
in_channel
%
group
==
0
,
in_channel
l
=
tf
.
reshape
(
l
,
[
-
1
,
group
,
in_channel
//
group
]
+
in_shape
[
-
2
:])
l
=
tf
.
transpose
(
l
,
[
0
,
2
,
1
,
3
,
4
])
l
=
tf
.
reshape
(
l
,
[
-
1
,
in_channel
]
+
in_shape
[
-
2
:])
...
...
@@ -69,7 +70,7 @@ class Model(ImageNetModel):
# We do not apply group convolution on the first pointwise layer
# because the number of input channels is relatively small.
first_split
=
group
if
in_channel
!=
1
6
else
1
first_split
=
group
if
in_channel
!=
1
2
else
1
l
=
Conv2D
(
'conv1'
,
l
,
out_channel
//
4
,
1
,
split
=
first_split
,
nl
=
BNReLU
)
l
=
channel_shuffle
(
l
,
group
)
l
=
DepthConv
(
'dconv'
,
l
,
out_channel
//
4
,
3
,
nl
=
BN
,
stride
=
stride
)
...
...
@@ -86,10 +87,10 @@ class Model(ImageNetModel):
with
argscope
([
Conv2D
,
MaxPooling
,
AvgPooling
,
GlobalAvgPooling
,
BatchNorm
],
data_format
=
self
.
data_format
),
\
argscope
(
Conv2D
,
use_bias
=
False
):
group
=
8
channels
=
[
224
,
416
,
832
]
group
=
3
channels
=
[
120
,
240
,
480
]
l
=
Conv2D
(
'conv1'
,
image
,
1
6
,
3
,
stride
=
2
,
nl
=
BNReLU
)
l
=
Conv2D
(
'conv1'
,
image
,
1
2
,
3
,
stride
=
2
,
nl
=
BNReLU
)
l
=
MaxPooling
(
'pool1'
,
l
,
3
,
2
,
padding
=
'SAME'
)
with
tf
.
variable_scope
(
'group1'
):
...
...
@@ -98,7 +99,7 @@ class Model(ImageNetModel):
l
=
shufflenet_unit
(
l
,
channels
[
0
],
group
,
2
if
i
==
0
else
1
)
with
tf
.
variable_scope
(
'group2'
):
for
i
in
range
(
6
):
for
i
in
range
(
8
):
with
tf
.
variable_scope
(
'block{}'
.
format
(
i
)):
l
=
shufflenet_unit
(
l
,
channels
[
1
],
group
,
2
if
i
==
0
else
1
)
...
...
@@ -148,11 +149,15 @@ def get_config(model, nr_tower):
logger
.
info
(
"Running on {} towers. Batch size per tower: {}"
.
format
(
nr_tower
,
batch
))
dataset_train
=
get_data
(
'train'
,
batch
)
dataset_val
=
get_data
(
'val'
,
batch
)
step_size
=
1280000
//
TOTAL_BATCH_SIZE
max_iter
=
3
*
10
**
5
max_epoch
=
(
max_iter
//
step_size
)
+
1
callbacks
=
[
ModelSaver
(),
ScheduledHyperParamSetter
(
'learning_rate'
,
[(
0
,
3e-1
),
(
30
,
3e-2
),
(
60
,
3e-3
),
(
90
,
3e-4
)])
,
HumanHyperParamSetter
(
'learning_rate'
),
[(
0
,
0.5
),
(
max_iter
,
0
)]
,
interp
=
'linear'
,
step_based
=
True
),
]
infs
=
[
ClassificationError
(
'wrong-top1'
,
'val-error-top1'
),
ClassificationError
(
'wrong-top5'
,
'val-error-top5'
)]
...
...
@@ -168,8 +173,8 @@ def get_config(model, nr_tower):
model
=
model
,
dataflow
=
dataset_train
,
callbacks
=
callbacks
,
steps_per_epoch
=
5000
,
max_epoch
=
100
,
steps_per_epoch
=
step_size
,
max_epoch
=
max_epoch
,
)
...
...
@@ -207,8 +212,7 @@ if __name__ == '__main__':
cmd
=
'op'
,
options
=
tf
.
profiler
.
ProfileOptionBuilder
.
float_operation
())
else
:
logger
.
set_logger_dir
(
os
.
path
.
join
(
'train_log'
,
'shufflenet'
))
logger
.
set_logger_dir
(
os
.
path
.
join
(
'train_log'
,
'shufflenet'
))
nr_tower
=
max
(
get_nr_gpu
(),
1
)
config
=
get_config
(
model
,
nr_tower
)
...
...
tensorpack/callbacks/param.py
View file @
3e9f164d
...
...
@@ -246,12 +246,6 @@ class ScheduledHyperParamSetter(HyperParamSetter):
def
_get_value_to_set
(
self
):
refnum
=
self
.
global_step
if
self
.
_step
else
self
.
epoch_num
if
self
.
interp
is
None
:
for
e
,
v
in
self
.
schedule
:
if
e
==
refnum
:
return
v
return
None
else
:
laste
,
lastv
=
None
,
None
for
e
,
v
in
self
.
schedule
:
if
e
==
refnum
:
...
...
@@ -262,7 +256,10 @@ class ScheduledHyperParamSetter(HyperParamSetter):
if
laste
is
None
or
laste
==
e
:
# hasn't reached the first scheduled point, or reached the end of all scheduled points
return
None
if
self
.
interp
is
not
None
:
v
=
(
refnum
-
laste
)
*
1.
/
(
e
-
laste
)
*
(
v
-
lastv
)
+
lastv
else
:
v
=
lastv
return
v
def
_trigger_epoch
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment