Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
54c5a42d
Commit
54c5a42d
authored
Feb 08, 2018
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Correct average=False && use_nccl=False
parent
f4f41711
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
20 additions
and
15 deletions
+20
-15
tensorpack/callbacks/prof.py
tensorpack/callbacks/prof.py
+3
-0
tensorpack/graph_builder/training.py
tensorpack/graph_builder/training.py
+3
-1
tensorpack/graph_builder/utils.py
tensorpack/graph_builder/utils.py
+12
-7
tensorpack/tfutils/common.py
tensorpack/tfutils/common.py
+1
-7
tensorpack/train/trainers.py
tensorpack/train/trainers.py
+1
-0
No files found.
tensorpack/callbacks/prof.py
View file @
54c5a42d
...
...
@@ -100,6 +100,9 @@ class GPUUtilizationTracker(Callback):
if
stop_evt
.
is_set
():
# or on exit
return
evt
.
clear
()
# Ignore the last datapoint. Usually is zero, makes us underestimate the util.
stats
-=
data
cnt
-=
1
rst_queue
.
put
(
stats
/
cnt
)
break
...
...
tensorpack/graph_builder/training.py
View file @
54c5a42d
...
...
@@ -221,7 +221,9 @@ class SyncMultiGPUReplicatedBuilder(DataParallelBuilder):
if
self
.
_use_nccl
:
self
.
grads
=
allreduce_grads
(
grad_list
,
average
=
self
.
_average
)
# #gpu x #param x 2
else
:
agg_grad_and_vars
=
average_grads
(
grad_list
,
colocation
=
False
,
devices
=
[
'/cpu:0'
])
# #param x 2
agg_grad_and_vars
=
average_grads
(
grad_list
,
colocation
=
False
,
devices
=
[
'/cpu:0'
],
average
=
self
.
_average
)
# #param x 2
self
.
grads
=
[]
# #gpu x #param x 2
for
grad_and_vars
in
grad_list
:
# grad_and_vars: #paramx2
# take v from each tower, and g from average.
...
...
tensorpack/graph_builder/utils.py
View file @
54c5a42d
...
...
@@ -133,7 +133,7 @@ def allreduce_grads(all_grads, average):
return
ret
def
average_grads
(
all_grads
,
colocation
=
True
,
devices
=
None
):
def
average_grads
(
all_grads
,
colocation
=
True
,
devices
=
None
,
average
=
True
):
"""
Average the gradients.
...
...
@@ -143,6 +143,7 @@ def average_grads(all_grads, colocation=True, devices=None):
colocation (bool): colocate gradient averaging on the device of the variable.
devices (list[str]): assign the averaging to these device in
round-robin. Cannot be used together with ``colocation``.
average (bool): do average or sum
Returns:
(N x 2): A list of N (grad, var) tuples, where grad is averaged over K.
...
...
@@ -154,6 +155,13 @@ def average_grads(all_grads, colocation=True, devices=None):
nr_tower
=
len
(
all_grads
)
if
nr_tower
==
1
:
return
all_grads
[
0
]
def
aggregate
(
grads
):
if
average
:
return
tf
.
multiply
(
tf
.
add_n
(
grads
),
1.0
/
nr_tower
)
else
:
return
tf
.
add_n
(
grads
)
ret
=
[]
with
tf
.
name_scope
(
'AvgGrad'
):
for
idx
,
grad_and_vars
in
enumerate
(
zip
(
*
all_grads
)):
...
...
@@ -163,16 +171,13 @@ def average_grads(all_grads, colocation=True, devices=None):
if
colocation
:
with
tf
.
device
(
v
.
device
):
# colocate summed grad with var
grad
=
tf
.
multiply
(
tf
.
add_n
(
grads
),
1.0
/
nr_tower
)
grad
=
aggregate
(
grads
)
elif
devices
is
None
:
grad
=
tf
.
multiply
(
tf
.
add_n
(
grads
),
1.0
/
nr_tower
)
grad
=
aggregate
(
grads
)
else
:
dev
=
devices
[
idx
%
len
(
devices
)]
with
tf
.
device
(
dev
):
grad
=
tf
.
multiply
(
tf
.
add_n
(
grads
),
1.0
/
nr_tower
)
grad
=
aggregate
(
grads
)
ret
.
append
((
grad
,
v
))
return
ret
...
...
tensorpack/tfutils/common.py
View file @
54c5a42d
...
...
@@ -58,13 +58,7 @@ def get_global_step_var():
"""
scope
=
tf
.
VariableScope
(
reuse
=
False
,
name
=
''
)
# the root vs
with
tf
.
variable_scope
(
scope
):
if
get_tf_version_number
()
<=
1.0
:
var
=
tf
.
get_variable
(
'global_step'
,
initializer
=
tf
.
constant
(
0
,
dtype
=
tf
.
int64
),
trainable
=
False
,
dtype
=
tf
.
int64
)
tf
.
add_to_collection
(
tf
.
GraphKeys
.
GLOBAL_STEP
,
var
)
else
:
var
=
tf
.
train
.
get_or_create_global_step
()
var
=
tf
.
train
.
get_or_create_global_step
()
return
var
...
...
tensorpack/train/trainers.py
View file @
54c5a42d
...
...
@@ -143,6 +143,7 @@ class SyncMultiGPUTrainerReplicated(SingleCostTrainer):
Args:
gpus (int or [int]): list of GPU ids.
average (bool): whether to average or sum gradients.
use_nccl (bool): use NCCL or TensorFlow copy to reduce.
"""
self
.
devices
=
gpus
self
.
_builder
=
SyncMultiGPUReplicatedBuilder
(
gpus
,
average
,
use_nccl
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment