Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
06bb5142
Commit
06bb5142
authored
Jul 31, 2017
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix SimpleTrainer. expose setup_graph for AsyncMultiGPUTrainer
parent
8837d748
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
28 additions
and
11 deletions
+28
-11
tensorpack/train/multigpu.py
tensorpack/train/multigpu.py
+27
-10
tensorpack/train/simple.py
tensorpack/train/simple.py
+1
-1
No files found.
tensorpack/train/multigpu.py
View file @
06bb5142
...
@@ -332,25 +332,37 @@ class AsyncMultiGPUTrainer(MultiGPUTrainerBase):
...
@@ -332,25 +332,37 @@ class AsyncMultiGPUTrainer(MultiGPUTrainerBase):
self
.
_scale_gradient
=
scale_gradient
self
.
_scale_gradient
=
scale_gradient
super
(
AsyncMultiGPUTrainer
,
self
)
.
__init__
(
config
)
super
(
AsyncMultiGPUTrainer
,
self
)
.
__init__
(
config
)
def
_setup
(
self
):
@
staticmethod
super
(
AsyncMultiGPUTrainer
,
self
)
.
_setup
()
def
setup_graph
(
model
,
input
,
scale_gradient
,
tower
):
raw_devices
=
[
'/gpu:{}'
.
format
(
k
)
for
k
in
self
.
config
.
tower
]
"""
Args:
model (ModelDesc):
input (InputSource):
scale_gradient (bool):
tower (list[int]):
Returns:
tf.Operation: the training op
[Callback]: the callbacks to be added
"""
input
.
setup
(
model
.
get_inputs_desc
())
raw_devices
=
[
'/gpu:{}'
.
format
(
k
)
for
k
in
tower
]
devices
=
[
LeastLoadedDeviceSetter
(
d
,
raw_devices
)
for
d
in
raw_devices
]
devices
=
[
LeastLoadedDeviceSetter
(
d
,
raw_devices
)
for
d
in
raw_devices
]
grad_list
=
MultiGPUTrainerBase
.
build_on_multi_tower
(
grad_list
=
MultiGPUTrainerBase
.
build_on_multi_tower
(
self
.
config
.
tower
,
tower
,
lambda
:
MultiGPUTrainerBase
.
_build_graph_get_grads
(
lambda
:
MultiGPUTrainerBase
.
_build_graph_get_grads
(
model
,
input
),
devices
)
self
.
model
,
self
.
_input_source
),
devices
)
MultiGPUTrainerBase
.
_check_grad_list
(
grad_list
)
MultiGPUTrainerBase
.
_check_grad_list
(
grad_list
)
if
s
elf
.
_scale_gradient
and
self
.
config
.
nr_tower
>
1
:
if
s
cale_gradient
and
len
(
tower
)
>
1
:
# pretend to average the grads, in order to make async and
# pretend to average the grads, in order to make async and
# sync have consistent effective learning rate
# sync have consistent effective learning rate
gradproc
=
ScaleGradient
((
'.*'
,
1.0
/
self
.
config
.
nr_tower
),
verbose
=
False
)
gradproc
=
ScaleGradient
((
'.*'
,
1.0
/
len
(
tower
)
),
verbose
=
False
)
grad_list
=
[
gradproc
.
process
(
gv
)
for
gv
in
grad_list
]
grad_list
=
[
gradproc
.
process
(
gv
)
for
gv
in
grad_list
]
# Ngpu x Nvar x 2
# Ngpu x Nvar x 2
train_ops
=
[]
train_ops
=
[]
opt
=
self
.
model
.
get_optimizer
()
opt
=
model
.
get_optimizer
()
for
i
,
grad_and_vars
in
enumerate
(
zip
(
*
grad_list
)):
for
i
,
grad_and_vars
in
enumerate
(
zip
(
*
grad_list
)):
# Ngpu x 2
# Ngpu x 2
v
=
grad_and_vars
[
0
][
1
]
v
=
grad_and_vars
[
0
][
1
]
...
@@ -358,4 +370,9 @@ class AsyncMultiGPUTrainer(MultiGPUTrainerBase):
...
@@ -358,4 +370,9 @@ class AsyncMultiGPUTrainer(MultiGPUTrainerBase):
# will call apply_gradients (therefore gradproc) multiple times
# will call apply_gradients (therefore gradproc) multiple times
train_ops
.
append
(
opt
.
apply_gradients
(
train_ops
.
append
(
opt
.
apply_gradients
(
grad_and_vars
,
name
=
'apply_grad_{}'
.
format
(
i
)))
grad_and_vars
,
name
=
'apply_grad_{}'
.
format
(
i
)))
self
.
train_op
=
tf
.
group
(
*
train_ops
,
name
=
'train_op'
)
return
tf
.
group
(
*
train_ops
,
name
=
'train_op'
),
input
.
get_callbacks
()
def
_setup
(
self
):
self
.
train_op
,
cbs
=
AsyncMultiGPUTrainer
.
setup_graph
(
self
.
model
,
self
.
_input_source
,
self
.
_scale_gradient
,
self
.
config
.
tower
)
self
.
config
.
callbacks
.
extend
(
cbs
)
tensorpack/train/simple.py
View file @
06bb5142
...
@@ -36,7 +36,7 @@ class SimpleTrainer(Trainer):
...
@@ -36,7 +36,7 @@ class SimpleTrainer(Trainer):
super
(
SimpleTrainer
,
self
)
.
__init__
(
config
)
super
(
SimpleTrainer
,
self
)
.
__init__
(
config
)
@
staticmethod
@
staticmethod
def
setup_graph
(
self
,
model
,
input
):
def
setup_graph
(
model
,
input
):
"""
"""
Setup graph for simple trainer.
Setup graph for simple trainer.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment