Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
6ecaab67
Commit
6ecaab67
authored
Jul 13, 2016
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add a bunch of scope & names for debugging
parent
a9a3b7d1
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
54 additions
and
36 deletions
+54
-36
tensorpack/callbacks/base.py
tensorpack/callbacks/base.py
+2
-1
tensorpack/tfutils/gradproc.py
tensorpack/tfutils/gradproc.py
+2
-1
tensorpack/tfutils/summary.py
tensorpack/tfutils/summary.py
+26
-22
tensorpack/train/multigpu.py
tensorpack/train/multigpu.py
+13
-11
tensorpack/train/trainer.py
tensorpack/train/trainer.py
+11
-1
No files found.
tensorpack/callbacks/base.py
View file @
6ecaab67
...
...
@@ -48,7 +48,8 @@ class Callback(object):
self
.
graph
=
tf
.
get_default_graph
()
self
.
epoch_num
=
self
.
trainer
.
config
.
starting_epoch
-
1
# self.epoch_num is always the number of epochs that finished updating parameters.
self
.
_setup_graph
()
with
tf
.
name_scope
(
type
(
self
)
.
__name__
):
self
.
_setup_graph
()
def
_setup_graph
(
self
):
pass
...
...
tensorpack/tfutils/gradproc.py
View file @
6ecaab67
...
...
@@ -22,7 +22,8 @@ class GradientProcessor(object):
:param grads: list of (grad, var)
:returns: symbolic gradients with the same type as input
"""
return
self
.
_process
(
grads
)
with
tf
.
name_scope
(
type
(
self
)
.
__name__
):
return
self
.
_process
(
grads
)
@
abstractmethod
def
_process
(
self
,
grads
):
...
...
tensorpack/tfutils/summary.py
View file @
6ecaab67
...
...
@@ -32,11 +32,12 @@ def add_activation_summary(x, name=None):
"Summary a scalar with histogram? Maybe use scalar instead. FIXME!"
if
name
is
None
:
name
=
x
.
name
tf
.
histogram_summary
(
name
+
'/activation'
,
x
)
tf
.
scalar_summary
(
name
+
'/activation_sparsity'
,
tf
.
nn
.
zero_fraction
(
x
))
tf
.
scalar_summary
(
name
+
'/activation_rms'
,
tf
.
sqrt
(
tf
.
reduce_mean
(
tf
.
square
(
x
))))
with
tf
.
name_scope
(
'act_summary'
):
tf
.
histogram_summary
(
name
+
'/activation'
,
x
)
tf
.
scalar_summary
(
name
+
'/activation_sparsity'
,
tf
.
nn
.
zero_fraction
(
x
))
tf
.
scalar_summary
(
name
+
'/activation_rms'
,
tf
.
sqrt
(
tf
.
reduce_mean
(
tf
.
square
(
x
))))
def
add_param_summary
(
summary_lists
):
"""
...
...
@@ -70,14 +71,15 @@ def add_param_summary(summary_lists):
import
re
params
=
tf
.
get_collection
(
tf
.
GraphKeys
.
TRAINABLE_VARIABLES
)
for
p
in
params
:
name
=
p
.
name
for
rgx
,
actions
in
summary_lists
:
if
not
rgx
.
endswith
(
'$'
):
rgx
=
rgx
+
'(:0)?$'
if
re
.
match
(
rgx
,
name
):
for
act
in
actions
:
perform
(
p
,
act
)
with
tf
.
name_scope
(
'param_summary'
):
for
p
in
params
:
name
=
p
.
name
for
rgx
,
actions
in
summary_lists
:
if
not
rgx
.
endswith
(
'$'
):
rgx
=
rgx
+
'(:0)?$'
if
re
.
match
(
rgx
,
name
):
for
act
in
actions
:
perform
(
p
,
act
)
def
add_moving_summary
(
v
,
*
args
):
"""
...
...
@@ -94,13 +96,15 @@ def summary_moving_average():
MOVING_SUMMARY_VARS_KEY.
:returns: a op to maintain these average.
"""
global_step_var
=
get_global_step_var
()
averager
=
tf
.
train
.
ExponentialMovingAverage
(
0.99
,
num_updates
=
global_step_var
,
name
=
'moving_averages'
)
vars_to_summary
=
tf
.
get_collection
(
MOVING_SUMMARY_VARS_KEY
)
avg_maintain_op
=
averager
.
apply
(
vars_to_summary
)
for
idx
,
c
in
enumerate
(
vars_to_summary
):
name
=
re
.
sub
(
'tower[p0-9]+/'
,
''
,
c
.
op
.
name
)
tf
.
scalar_summary
(
name
,
averager
.
average
(
c
))
return
avg_maintain_op
with
tf
.
name_scope
(
'EMA_summary'
):
global_step_var
=
get_global_step_var
()
with
tf
.
name_scope
(
None
):
averager
=
tf
.
train
.
ExponentialMovingAverage
(
0.99
,
num_updates
=
global_step_var
,
name
=
'EMA'
)
vars_to_summary
=
tf
.
get_collection
(
MOVING_SUMMARY_VARS_KEY
)
avg_maintain_op
=
averager
.
apply
(
vars_to_summary
)
for
idx
,
c
in
enumerate
(
vars_to_summary
):
name
=
re
.
sub
(
'tower[p0-9]+/'
,
''
,
c
.
op
.
name
)
tf
.
scalar_summary
(
name
,
averager
.
average
(
c
))
return
avg_maintain_op
tensorpack/train/multigpu.py
View file @
6ecaab67
...
...
@@ -25,14 +25,15 @@ class MultiGPUTrainer(QueueInputTrainer):
@
staticmethod
def
_average_grads
(
tower_grads
):
ret
=
[]
for
grad_and_vars
in
zip
(
*
tower_grads
):
v
=
grad_and_vars
[
0
][
1
]
try
:
grad
=
tf
.
add_n
([
x
[
0
]
for
x
in
grad_and_vars
])
/
float
(
len
(
tower_grads
))
except
:
logger
.
error
(
"Error while processing gradients of {}"
.
format
(
v
.
name
))
raise
ret
.
append
((
grad
,
v
))
with
tf
.
name_scope
(
'average_grad'
):
for
grad_and_vars
in
zip
(
*
tower_grads
):
v
=
grad_and_vars
[
0
][
1
]
try
:
grad
=
tf
.
add_n
([
x
[
0
]
for
x
in
grad_and_vars
])
/
float
(
len
(
tower_grads
))
except
:
logger
.
error
(
"Error while processing gradients of {}"
.
format
(
v
.
name
))
raise
ret
.
append
((
grad
,
v
))
return
ret
def
_multi_tower_grads
(
self
):
...
...
@@ -73,7 +74,7 @@ class SyncMultiGPUTrainer(MultiGPUTrainer):
self
.
train_op
=
tf
.
group
(
self
.
config
.
optimizer
.
apply_gradients
(
grads
,
get_global_step_var
()),
summary_moving_average
())
summary_moving_average
()
,
name
=
'train_op'
)
describe_model
()
with
freeze_collection
(
self
.
SUMMARY_BACKUP_KEYS
):
...
...
@@ -92,14 +93,15 @@ class AsyncMultiGPUTrainer(MultiGPUTrainer):
# pretend to average the grads, in order to make async and
# sync have consistent effective learning rate
def
scale
(
grads
):
return
[(
grad
/
self
.
config
.
nr_tower
,
var
)
for
grad
,
var
in
grads
]
with
tf
.
name_scope
(
'async_scale_grad'
):
return
[(
grad
/
self
.
config
.
nr_tower
,
var
)
for
grad
,
var
in
grads
]
grad_list
=
map
(
scale
,
grad_list
)
grad_list
=
[
self
.
process_grads
(
g
)
for
g
in
grad_list
]
# use grad from the first tower for iteration in main thread
self
.
train_op
=
tf
.
group
(
self
.
config
.
optimizer
.
apply_gradients
(
grad_list
[
0
],
get_global_step_var
()),
summary_moving_average
())
summary_moving_average
()
,
name
=
'train_op'
)
describe_model
()
# prepare train_op for the rest of the towers
...
...
tensorpack/train/trainer.py
View file @
6ecaab67
...
...
@@ -175,13 +175,23 @@ class QueueInputTrainer(Trainer):
self
.
train_op
=
tf
.
group
(
self
.
config
.
optimizer
.
apply_gradients
(
grads
,
get_global_step_var
()),
summary_moving_average
())
summary_moving_average
()
,
'train_op'
)
self
.
main_loop
()
def
run_step
(
self
):
""" just run self.train_op"""
self
.
sess
.
run
([
self
.
train_op
])
#run_metadata = tf.RunMetadata()
#self.sess.run([self.train_op],
#options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
#run_metadata=run_metadata
#)
#from tensorflow.python.client import timeline
#trace = timeline.Timeline(step_stats=run_metadata.step_stats)
#trace_file = open('timeline.ctf.json', 'w')
#trace_file.write(trace.generate_chrome_trace_format())
#import sys; sys.exit()
def
_trigger_epoch
(
self
):
# need to run summary_op every epoch
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment