Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
3b5ee108
Commit
3b5ee108
authored
Feb 12, 2017
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add average_cost option n SyncMultiGPUTrainer
parent
53549d52
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
60 additions
and
13 deletions
+60
-13
tensorpack/train/feedfree.py
tensorpack/train/feedfree.py
+1
-1
tensorpack/train/input_data.py
tensorpack/train/input_data.py
+3
-4
tensorpack/train/multigpu.py
tensorpack/train/multigpu.py
+56
-8
No files found.
tensorpack/train/feedfree.py
View file @
3b5ee108
...
...
@@ -51,7 +51,7 @@ class SingleCostFeedfreeTrainer(FeedfreeTrainerBase):
grads
=
opt
.
compute_gradients
(
cost_var
,
gate_gradients
=
tf
.
train
.
Optimizer
.
GATE_NONE
,
colocate_gradients_with_ops
=
Fals
e
)
colocate_gradients_with_ops
=
Tru
e
)
return
cost_var
,
grads
def
run_step
(
self
):
...
...
tensorpack/train/input_data.py
View file @
3b5ee108
...
...
@@ -154,10 +154,9 @@ class QueueInput(FeedfreeInput):
qv
.
set_shape
(
v
.
get_shape
())
# test the overhead of queue
# with tf.device('/gpu:0'):
# ret = [tf.Variable(tf.random_normal([128,224,224,3],
# dtype=tf.float32), trainable=False),
# tf.Variable(tf.ones([128], dtype=tf.int32), trainable=False)]
# ret = [tf.Variable(tf.random_normal([64,224,224,3],
# dtype=tf.float32), trainable=False),
# tf.Variable(tf.ones([64], dtype=tf.int32), trainable=False)]
return
ret
...
...
tensorpack/train/multigpu.py
View file @
3b5ee108
...
...
@@ -46,6 +46,26 @@ class MultiGPUTrainer(Trainer):
restore_collection
(
backup
)
return
grad_list
@
staticmethod
def
_multi_tower_costs
(
towers
,
get_tower_cost_func
):
logger
.
info
(
"Training a model of {} tower"
.
format
(
len
(
towers
)))
cost_list
=
[]
global_scope
=
tf
.
get_variable_scope
()
for
idx
,
t
in
enumerate
(
towers
):
with
tf
.
device
(
'/gpu:{}'
.
format
(
t
)),
\
tf
.
variable_scope
(
global_scope
,
reuse
=
idx
>
0
),
\
TowerContext
(
'tower{}'
.
format
(
idx
)):
logger
.
info
(
"Building graph for training tower {}..."
.
format
(
idx
))
cost_list
.
append
(
get_tower_cost_func
())
if
idx
==
0
:
# avoid repeated summary from each device
backup
=
backup_collection
(
SUMMARY_BACKUP_KEYS
)
restore_collection
(
backup
)
return
cost_list
class
SyncMultiGPUTrainer
(
MultiGPUTrainer
,
SingleCostFeedfreeTrainer
,
...
...
@@ -55,10 +75,15 @@ class SyncMultiGPUTrainer(MultiGPUTrainer,
from each tower and averages them.
"""
def
__init__
(
self
,
config
,
input_queue
=
None
,
predict_tower
=
None
):
def
__init__
(
self
,
config
,
input_queue
=
None
,
average_cost
=
False
,
predict_tower
=
None
):
"""
Args:
config, input_queue: same as in :class:`QueueInputTrainer`.
average_cost (bool): average the cost (instead of gradients) from
each tower and did backprop only once. Should no make
difference mathematically, but may affect speed.
"""
if
config
.
dataflow
is
not
None
:
self
.
_input_method
=
QueueInput
(
config
.
dataflow
,
input_queue
)
...
...
@@ -75,6 +100,7 @@ class SyncMultiGPUTrainer(MultiGPUTrainer,
self
.
_setup_predictor_factory
()
assert
len
(
config
.
tower
)
>=
1
,
"MultiGPUTrainer must be used with at least one GPU."
assert
tf
.
test
.
is_gpu_available
()
self
.
average_cost
=
average_cost
@
staticmethod
def
_average_grads
(
tower_grads
):
...
...
@@ -102,15 +128,37 @@ class SyncMultiGPUTrainer(MultiGPUTrainer,
def
_setup
(
self
):
super
(
SyncMultiGPUTrainer
,
self
)
.
_setup
()
grad_list
=
MultiGPUTrainer
.
_multi_tower_grads
(
self
.
config
.
tower
,
lambda
:
self
.
_get_cost_and_grad
()[
1
])
if
not
self
.
average_cost
:
grad_list
=
MultiGPUTrainer
.
_multi_tower_grads
(
self
.
config
.
tower
,
lambda
:
self
.
_get_cost_and_grad
()[
1
])
# debug tower performance:
# ops = [k[0] for k in grad_list[1]] + [k[0] for k in grad_list[0]]
# self.train_op = tf.group(*ops)
# return
# debug tower performance (without update):
# ops = [k[0] for k in grad_list[1]] + [k[0] for k in grad_list[0]]
# self.train_op = tf.group(*ops)
# return
grads
=
SyncMultiGPUTrainer
.
_average_grads
(
grad_list
)
# grads = grad_list[0]
else
:
def
get_cost
():
actual_inputs
=
self
.
_get_input_tensors
()
self
.
model
.
build_graph
(
actual_inputs
)
return
self
.
model
.
get_cost
()
cost_list
=
MultiGPUTrainer
.
_multi_tower_costs
(
self
.
config
.
tower
,
get_cost
)
cost
=
tf
.
multiply
(
tf
.
add_n
(
cost_list
),
1.0
/
len
(
cost_list
),
name
=
'averaged_cost'
)
opt
=
self
.
config
.
optimizer
if
opt
is
None
:
opt
=
self
.
model
.
get_optimizer
()
self
.
config
.
optimizer
=
opt
grads
=
opt
.
compute_gradients
(
cost
,
gate_gradients
=
tf
.
train
.
Optimizer
.
GATE_NONE
,
colocate_gradients_with_ops
=
True
)
grads
=
SyncMultiGPUTrainer
.
_average_grads
(
grad_list
)
grads
=
apply_grad_processors
(
grads
,
self
.
model
.
get_gradient_processor
())
self
.
train_op
=
self
.
config
.
optimizer
.
apply_gradients
(
grads
,
name
=
'min_op'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment