Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
d44d32d6
Commit
d44d32d6
authored
Aug 06, 2017
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
organize name scopes in EMA & trainers (#340)
parent
5b310290
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
88 additions
and
63 deletions
+88
-63
examples/GAN/BEGAN.py
examples/GAN/BEGAN.py
+10
-7
examples/GAN/GAN.py
examples/GAN/GAN.py
+20
-17
examples/cifar-convnet.py
examples/cifar-convnet.py
+1
-1
tensorpack/graph_builder/model_desc.py
tensorpack/graph_builder/model_desc.py
+5
-2
tensorpack/models/regularize.py
tensorpack/models/regularize.py
+2
-3
tensorpack/tfutils/optimizer.py
tensorpack/tfutils/optimizer.py
+20
-19
tensorpack/tfutils/summary.py
tensorpack/tfutils/summary.py
+20
-5
tensorpack/train/multigpu.py
tensorpack/train/multigpu.py
+9
-8
tensorpack/utils/argtools.py
tensorpack/utils/argtools.py
+1
-1
No files found.
examples/GAN/BEGAN.py
View file @
d44d32d6
...
@@ -104,22 +104,25 @@ class Model(GANModelDesc):
...
@@ -104,22 +104,25 @@ class Model(GANModelDesc):
with
tf
.
variable_scope
(
'dec'
):
with
tf
.
variable_scope
(
'dec'
):
recon_pos
=
self
.
decoder
(
hidden_pos
)
recon_pos
=
self
.
decoder
(
hidden_pos
)
recon_neg
=
self
.
decoder
(
hidden_neg
)
recon_neg
=
self
.
decoder
(
hidden_neg
)
with
tf
.
name_scope
(
'viz'
):
summary_image
(
'generated-samples'
,
image_gen
)
summary_image
(
'generated-samples'
,
image_gen
)
summary_image
(
'reconstruct-real'
,
recon_pos
)
summary_image
(
'reconstruct-real'
,
recon_pos
)
summary_image
(
'reconstruct-fake'
,
recon_neg
)
summary_image
(
'reconstruct-fake'
,
recon_neg
)
with
tf
.
name_scope
(
'losses'
):
L_pos
=
tf
.
reduce_mean
(
tf
.
abs
(
recon_pos
-
image_pos
),
name
=
'loss_pos'
)
L_pos
=
tf
.
reduce_mean
(
tf
.
abs
(
recon_pos
-
image_pos
),
name
=
'loss_pos'
)
L_neg
=
tf
.
reduce_mean
(
tf
.
abs
(
recon_neg
-
image_gen
),
name
=
'loss_neg'
)
L_neg
=
tf
.
reduce_mean
(
tf
.
abs
(
recon_neg
-
image_gen
),
name
=
'loss_neg'
)
eq
=
tf
.
subtract
(
GAMMA
*
L_pos
,
L_neg
,
name
=
'equilibrium'
)
eq
=
tf
.
subtract
(
GAMMA
*
L_pos
,
L_neg
,
name
=
'equilibrium'
)
measure
=
tf
.
add
(
L_pos
,
tf
.
abs
(
eq
),
name
=
'measure'
)
measure
=
tf
.
add
(
L_pos
,
tf
.
abs
(
eq
),
name
=
'measure'
)
kt
=
tf
.
get_variable
(
'kt'
,
dtype
=
tf
.
float32
,
initializer
=
0.0
)
kt
=
tf
.
get_variable
(
'kt'
,
dtype
=
tf
.
float32
,
initializer
=
0.0
)
update_kt
=
kt
.
assign_add
(
1e-3
*
eq
)
update_kt
=
kt
.
assign_add
(
1e-3
*
eq
)
with
tf
.
control_dependencies
([
update_kt
]):
with
tf
.
control_dependencies
([
update_kt
]):
self
.
d_loss
=
tf
.
subtract
(
L_pos
,
kt
*
L_neg
,
name
=
'loss_D'
)
self
.
d_loss
=
tf
.
subtract
(
L_pos
,
kt
*
L_neg
,
name
=
'loss_D'
)
self
.
g_loss
=
L_neg
self
.
g_loss
=
L_neg
add_moving_summary
(
L_pos
,
L_neg
,
eq
,
measure
,
self
.
d_loss
)
add_moving_summary
(
L_pos
,
L_neg
,
eq
,
measure
,
self
.
d_loss
)
tf
.
summary
.
scalar
(
'kt-summary'
,
kt
)
tf
.
summary
.
scalar
(
'kt-summary'
,
kt
)
...
...
examples/GAN/GAN.py
View file @
d44d32d6
...
@@ -77,9 +77,10 @@ class GANTrainer(Trainer):
...
@@ -77,9 +77,10 @@ class GANTrainer(Trainer):
opt
=
model
.
get_optimizer
()
opt
=
model
.
get_optimizer
()
# by default, run one d_min after one g_min
# by default, run one d_min after one g_min
g_min
=
opt
.
minimize
(
model
.
g_loss
,
var_list
=
model
.
g_vars
,
name
=
'g_op'
)
with
tf
.
name_scope
(
'optimize'
):
with
tf
.
control_dependencies
([
g_min
]):
g_min
=
opt
.
minimize
(
model
.
g_loss
,
var_list
=
model
.
g_vars
,
name
=
'g_op'
)
d_min
=
opt
.
minimize
(
model
.
d_loss
,
var_list
=
model
.
d_vars
,
name
=
'd_op'
)
with
tf
.
control_dependencies
([
g_min
]):
d_min
=
opt
.
minimize
(
model
.
d_loss
,
var_list
=
model
.
d_vars
,
name
=
'd_op'
)
self
.
train_op
=
d_min
self
.
train_op
=
d_min
super
(
GANTrainer
,
self
)
.
__init__
(
config
)
super
(
GANTrainer
,
self
)
.
__init__
(
config
)
...
@@ -106,10 +107,11 @@ class SeparateGANTrainer(Trainer):
...
@@ -106,10 +107,11 @@ class SeparateGANTrainer(Trainer):
model
.
build_graph
(
input
)
model
.
build_graph
(
input
)
opt
=
model
.
get_optimizer
()
opt
=
model
.
get_optimizer
()
self
.
d_min
=
opt
.
minimize
(
with
tf
.
name_scope
(
'optimize'
):
model
.
d_loss
,
var_list
=
model
.
d_vars
,
name
=
'd_min'
)
self
.
d_min
=
opt
.
minimize
(
self
.
g_min
=
opt
.
minimize
(
model
.
d_loss
,
var_list
=
model
.
d_vars
,
name
=
'd_min'
)
model
.
g_loss
,
var_list
=
model
.
g_vars
,
name
=
'g_min'
)
self
.
g_min
=
opt
.
minimize
(
model
.
g_loss
,
var_list
=
model
.
g_vars
,
name
=
'g_min'
)
super
(
SeparateGANTrainer
,
self
)
.
__init__
(
config
)
super
(
SeparateGANTrainer
,
self
)
.
__init__
(
config
)
...
@@ -142,16 +144,17 @@ class MultiGPUGANTrainer(Trainer):
...
@@ -142,16 +144,17 @@ class MultiGPUGANTrainer(Trainer):
cost_list
=
MultiGPUTrainerBase
.
build_on_multi_tower
(
cost_list
=
MultiGPUTrainerBase
.
build_on_multi_tower
(
config
.
tower
,
get_cost
,
devices
)
config
.
tower
,
get_cost
,
devices
)
# simply average the cost. It might get faster to average the gradients
# simply average the cost. It might get faster to average the gradients
d_loss
=
tf
.
add_n
([
x
[
0
]
for
x
in
cost_list
])
*
(
1.0
/
nr_gpu
)
with
tf
.
name_scope
(
'optimize'
):
g_loss
=
tf
.
add_n
([
x
[
1
]
for
x
in
cost_list
])
*
(
1.0
/
nr_gpu
)
d_loss
=
tf
.
add_n
([
x
[
0
]
for
x
in
cost_list
])
*
(
1.0
/
nr_gpu
)
g_loss
=
tf
.
add_n
([
x
[
1
]
for
x
in
cost_list
])
*
(
1.0
/
nr_gpu
)
opt
=
model
.
get_optimizer
()
# run one d_min after one g_min
opt
=
model
.
get_optimizer
()
g_min
=
opt
.
minimize
(
g_loss
,
var_list
=
model
.
g_vars
,
# run one d_min after one g_min
colocate_gradients_with_ops
=
True
,
name
=
'g_op'
)
g_min
=
opt
.
minimize
(
g_loss
,
var_list
=
model
.
g_vars
,
with
tf
.
control_dependencies
([
g_min
]):
colocate_gradients_with_ops
=
True
,
name
=
'g_op'
)
d_min
=
opt
.
minimize
(
d_loss
,
var_list
=
model
.
d_vars
,
with
tf
.
control_dependencies
([
g_min
]):
colocate_gradients_with_ops
=
True
,
name
=
'd_op'
)
d_min
=
opt
.
minimize
(
d_loss
,
var_list
=
model
.
d_vars
,
colocate_gradients_with_ops
=
True
,
name
=
'd_op'
)
self
.
train_op
=
d_min
self
.
train_op
=
d_min
super
(
MultiGPUGANTrainer
,
self
)
.
__init__
(
config
)
super
(
MultiGPUGANTrainer
,
self
)
.
__init__
(
config
)
...
...
examples/cifar-convnet.py
View file @
d44d32d6
...
@@ -155,4 +155,4 @@ if __name__ == '__main__':
...
@@ -155,4 +155,4 @@ if __name__ == '__main__':
if
config
.
nr_tower
<=
1
:
if
config
.
nr_tower
<=
1
:
QueueInputTrainer
(
config
)
.
train
()
QueueInputTrainer
(
config
)
.
train
()
else
:
else
:
S
yncMultiGPUTrainer
(
config
)
.
train
()
As
yncMultiGPUTrainer
(
config
)
.
train
()
tensorpack/graph_builder/model_desc.py
View file @
d44d32d6
...
@@ -146,8 +146,11 @@ class ModelDesc(ModelDescBase):
...
@@ -146,8 +146,11 @@ class ModelDesc(ModelDescBase):
``tf.GraphKeys.REGULARIZATION_LOSSES`` to the cost automatically.
``tf.GraphKeys.REGULARIZATION_LOSSES`` to the cost automatically.
"""
"""
cost
=
self
.
_get_cost
()
cost
=
self
.
_get_cost
()
return
tf
.
add
(
cost
,
regularize_cost_from_collection
(),
reg_cost
=
regularize_cost_from_collection
()
name
=
'cost_with_regularizer'
)
if
reg_cost
:
return
tf
.
add
(
cost
,
reg_cost
,
name
=
'cost_with_regularizer'
)
else
:
return
cost
def
_get_cost
(
self
,
*
args
):
def
_get_cost
(
self
,
*
args
):
return
self
.
cost
return
self
.
cost
...
...
tensorpack/models/regularize.py
View file @
d44d32d6
...
@@ -64,7 +64,7 @@ def regularize_cost_from_collection(name='regularize_cost'):
...
@@ -64,7 +64,7 @@ def regularize_cost_from_collection(name='regularize_cost'):
In replicated mode, will only regularize variables within the current tower.
In replicated mode, will only regularize variables within the current tower.
Returns:
Returns:
a scalar tensor, the regularization loss
.
a scalar tensor, the regularization loss
, or 0
"""
"""
regularization_losses
=
set
(
tf
.
get_collection
(
tf
.
GraphKeys
.
REGULARIZATION_LOSSES
))
regularization_losses
=
set
(
tf
.
get_collection
(
tf
.
GraphKeys
.
REGULARIZATION_LOSSES
))
ctx
=
get_current_tower_context
()
ctx
=
get_current_tower_context
()
...
@@ -73,12 +73,11 @@ def regularize_cost_from_collection(name='regularize_cost'):
...
@@ -73,12 +73,11 @@ def regularize_cost_from_collection(name='regularize_cost'):
# It is only added with variables that are newly created.
# It is only added with variables that are newly created.
if
ctx
.
has_own_variables
:
# be careful of the first tower (name='')
if
ctx
.
has_own_variables
:
# be careful of the first tower (name='')
regularization_losses
=
ctx
.
filter_vars_by_vs_name
(
regularization_losses
)
regularization_losses
=
ctx
.
filter_vars_by_vs_name
(
regularization_losses
)
print
([
k
.
name
for
k
in
regularization_losses
])
logger
.
info
(
"Add REGULARIZATION_LOSSES of {} tensors on the total cost."
.
format
(
len
(
regularization_losses
)))
logger
.
info
(
"Add REGULARIZATION_LOSSES of {} tensors on the total cost."
.
format
(
len
(
regularization_losses
)))
reg_loss
=
tf
.
add_n
(
list
(
regularization_losses
),
name
=
name
)
reg_loss
=
tf
.
add_n
(
list
(
regularization_losses
),
name
=
name
)
return
reg_loss
return
reg_loss
else
:
else
:
return
tf
.
constant
(
0
,
dtype
=
tf
.
float32
,
name
=
'empty_'
+
name
)
return
0
@
layer_register
(
log_shape
=
False
,
use_scope
=
False
)
@
layer_register
(
log_shape
=
False
,
use_scope
=
False
)
...
...
tensorpack/tfutils/optimizer.py
View file @
d44d32d6
...
@@ -174,24 +174,25 @@ class AccumGradOptimizer(ProxyOptimizer):
...
@@ -174,24 +174,25 @@ class AccumGradOptimizer(ProxyOptimizer):
counter
=
tf
.
Variable
(
counter
=
tf
.
Variable
(
0
,
name
=
"counter"
,
trainable
=
False
,
dtype
=
tf
.
int32
)
0
,
name
=
"counter"
,
trainable
=
False
,
dtype
=
tf
.
int32
)
ops
=
[]
with
tf
.
name_scope
(
'AccumGradOptimizer'
):
for
s
,
gv
in
zip
(
slots
,
grads_and_vars
):
ops
=
[]
g
,
v
=
gv
for
s
,
gv
in
zip
(
slots
,
grads_and_vars
):
ops
.
append
(
s
.
assign_add
(
g
))
g
,
v
=
gv
update_counter
=
tf
.
assign_add
(
counter
,
1
,
name
=
'update_counter'
)
ops
.
append
(
s
.
assign_add
(
g
))
update_slot_op
=
tf
.
group
(
update_counter
,
*
ops
,
name
=
'update_slot'
)
update_counter
=
tf
.
assign_add
(
counter
,
1
,
name
=
'update_counter'
)
update_slot_op
=
tf
.
group
(
update_counter
,
*
ops
,
name
=
'update_slot'
)
def
update_grad
():
update_op
=
self
.
_opt
.
apply_gradients
(
slots_and_vars
)
def
update_grad
():
with
tf
.
control_dependencies
([
update_op
]):
update_op
=
self
.
_opt
.
apply_gradients
(
slots_and_vars
)
clear_ops
=
[
tf
.
assign
(
s
,
tf
.
zeros_like
(
s
))
for
s
in
slots
]
with
tf
.
control_dependencies
([
update_op
]):
return
tf
.
group
(
*
clear_ops
,
name
=
'update_grad'
)
clear_ops
=
[
tf
.
assign
(
s
,
tf
.
zeros_like
(
s
))
for
s
in
slots
]
return
tf
.
group
(
*
clear_ops
,
name
=
'update_grad'
)
pred
=
tf
.
equal
(
tf
.
mod
(
counter
,
self
.
_niter
),
0
)
with
tf
.
control_dependencies
([
update_slot_op
]):
pred
=
tf
.
equal
(
tf
.
mod
(
counter
,
self
.
_niter
),
0
)
if
name
is
None
:
with
tf
.
control_dependencies
([
update_slot_op
]):
name
=
'cond_update_grad'
if
name
is
None
:
op
=
tf
.
cond
(
pred
,
update_grad
,
tf
.
no_op
,
name
=
name
)
.
op
name
=
'cond_update_grad'
op
=
tf
.
cond
(
pred
,
update_grad
,
tf
.
no_op
,
name
=
name
)
.
op
return
op
return
op
...
@@ -201,7 +202,7 @@ if __name__ == '__main__':
...
@@ -201,7 +202,7 @@ if __name__ == '__main__':
x
=
tf
.
get_variable
(
'x'
,
shape
=
[
6
])
x
=
tf
.
get_variable
(
'x'
,
shape
=
[
6
])
cost
=
tf
.
reduce_sum
(
tf
.
abs
(
x
),
name
=
'cost'
)
cost
=
tf
.
reduce_sum
(
tf
.
abs
(
x
),
name
=
'cost'
)
opt
=
tf
.
train
.
GradientDescentOptimizer
(
0.01
)
opt
=
tf
.
train
.
GradientDescentOptimizer
(
0.01
)
#
opt = AccumGradOptimizer(opt, 5)
opt
=
AccumGradOptimizer
(
opt
,
5
)
min_op
=
opt
.
minimize
(
cost
)
min_op
=
opt
.
minimize
(
cost
)
sess
=
tf
.
Session
()
sess
=
tf
.
Session
()
...
...
tensorpack/tfutils/summary.py
View file @
d44d32d6
...
@@ -7,11 +7,13 @@ import tensorflow as tf
...
@@ -7,11 +7,13 @@ import tensorflow as tf
import
re
import
re
import
io
import
io
from
six.moves
import
range
from
six.moves
import
range
from
contextlib
import
contextmanager
from
tensorflow.python.training
import
moving_averages
from
tensorflow.python.training
import
moving_averages
from
..utils
import
logger
from
..utils
import
logger
from
..utils.develop
import
log_deprecated
from
..utils.develop
import
log_deprecated
from
..utils.argtools
import
graph_memoized
from
..utils.naming
import
MOVING_SUMMARY_OPS_KEY
from
..utils.naming
import
MOVING_SUMMARY_OPS_KEY
from
.tower
import
get_current_tower_context
from
.tower
import
get_current_tower_context
from
.symbolic_functions
import
rms
from
.symbolic_functions
import
rms
...
@@ -140,6 +142,20 @@ def add_param_summary(*summary_lists):
...
@@ -140,6 +142,20 @@ def add_param_summary(*summary_lists):
perform
(
p
,
act
)
perform
(
p
,
act
)
@
graph_memoized
def
_get_cached_vs
(
name
):
with
tf
.
variable_scope
(
name
)
as
scope
:
return
scope
@
contextmanager
def
_enter_vs_reuse_ns
(
name
):
vs
=
_get_cached_vs
(
name
)
with
tf
.
variable_scope
(
vs
):
with
tf
.
name_scope
(
vs
.
original_name_scope
):
yield
vs
def
add_moving_summary
(
v
,
*
args
,
**
kwargs
):
def
add_moving_summary
(
v
,
*
args
,
**
kwargs
):
"""
"""
Enable moving average summary for some tensors.
Enable moving average summary for some tensors.
...
@@ -173,19 +189,18 @@ def add_moving_summary(v, *args, **kwargs):
...
@@ -173,19 +189,18 @@ def add_moving_summary(v, *args, **kwargs):
for
c
in
v
:
for
c
in
v
:
name
=
re
.
sub
(
'tower[0-9]+/'
,
''
,
c
.
op
.
name
)
name
=
re
.
sub
(
'tower[0-9]+/'
,
''
,
c
.
op
.
name
)
with
G
.
colocate_with
(
c
):
with
G
.
colocate_with
(
c
)
,
tf
.
name_scope
(
None
)
:
with
tf
.
variable_scope
(
'EMA'
)
as
vs
:
with
_enter_vs_reuse_ns
(
'EMA'
)
as
vs
:
# will actually create ns EMA_1, EMA_2, etc. tensorflow#6007
# will actually create ns EMA_1, EMA_2, etc. tensorflow#6007
ema_var
=
tf
.
get_variable
(
name
,
shape
=
c
.
shape
,
dtype
=
c
.
dtype
,
ema_var
=
tf
.
get_variable
(
name
,
shape
=
c
.
shape
,
dtype
=
c
.
dtype
,
initializer
=
tf
.
constant_initializer
(),
trainable
=
False
)
initializer
=
tf
.
constant_initializer
(),
trainable
=
False
)
ns
=
vs
.
original_name_scope
ns
=
vs
.
original_name_scope
# first clear NS to avoid duplicated name in variables
# first clear NS to avoid duplicated name in variables
with
tf
.
name_scope
(
None
),
tf
.
name_scope
(
ns
):
with
tf
.
name_scope
(
ns
):
ema_op
=
moving_averages
.
assign_moving_average
(
ema_op
=
moving_averages
.
assign_moving_average
(
ema_var
,
c
,
decay
,
ema_var
,
c
,
decay
,
zero_debias
=
True
,
name
=
name
+
'_EMA_apply'
)
zero_debias
=
True
,
name
=
name
+
'_EMA_apply'
)
with
tf
.
name_scope
(
None
):
tf
.
summary
.
scalar
(
name
+
'-summary'
,
ema_op
)
tf
.
summary
.
scalar
(
name
+
'-summary'
,
ema_op
)
tf
.
add_to_collection
(
coll
,
ema_op
)
tf
.
add_to_collection
(
coll
,
ema_op
)
# TODO a new collection to summary every step?
# TODO a new collection to summary every step?
...
...
tensorpack/train/multigpu.py
View file @
d44d32d6
...
@@ -403,14 +403,15 @@ class AsyncMultiGPUTrainer(MultiGPUTrainerBase):
...
@@ -403,14 +403,15 @@ class AsyncMultiGPUTrainer(MultiGPUTrainerBase):
train_ops
=
[]
train_ops
=
[]
opt
=
model
.
get_optimizer
()
opt
=
model
.
get_optimizer
()
for
i
,
grad_and_vars
in
enumerate
(
zip
(
*
grad_list
)):
with
tf
.
name_scope
(
'async_apply_gradients'
):
# Ngpu x 2
for
i
,
grad_and_vars
in
enumerate
(
zip
(
*
grad_list
)):
v
=
grad_and_vars
[
0
][
1
]
# Ngpu x 2
with
tf
.
device
(
v
.
device
):
v
=
grad_and_vars
[
0
][
1
]
# will call apply_gradients (therefore gradproc) multiple times
with
tf
.
device
(
v
.
device
):
train_ops
.
append
(
opt
.
apply_gradients
(
# will call apply_gradients (therefore gradproc) multiple times
grad_and_vars
,
name
=
'apply_grad_{}'
.
format
(
i
)))
train_ops
.
append
(
opt
.
apply_gradients
(
return
tf
.
group
(
*
train_ops
,
name
=
'train_op'
),
callbacks
grad_and_vars
,
name
=
'apply_grad_{}'
.
format
(
i
)))
return
tf
.
group
(
*
train_ops
,
name
=
'train_op'
),
callbacks
def
_setup
(
self
):
def
_setup
(
self
):
self
.
train_op
,
cbs
=
AsyncMultiGPUTrainer
.
setup_graph
(
self
.
train_op
,
cbs
=
AsyncMultiGPUTrainer
.
setup_graph
(
...
...
tensorpack/utils/argtools.py
View file @
d44d32d6
...
@@ -11,7 +11,7 @@ if six.PY2:
...
@@ -11,7 +11,7 @@ if six.PY2:
else
:
else
:
import
functools
import
functools
__all__
=
[
'map_arg'
,
'memoized'
,
'shape2d'
,
'shape4d'
,
__all__
=
[
'map_arg'
,
'memoized'
,
'
graph_memoized'
,
'
shape2d'
,
'shape4d'
,
'memoized_ignoreargs'
,
'log_once'
]
'memoized_ignoreargs'
,
'log_once'
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment