Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
12bf21bc
Commit
12bf21bc
authored
Jul 14, 2016
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
asynctrainer global counter
parent
6ecaab67
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
21 additions
and
5 deletions
+21
-5
tensorpack/train/multigpu.py
tensorpack/train/multigpu.py
+21
-5
No files found.
tensorpack/train/multigpu.py
View file @
12bf21bc
...
@@ -4,6 +4,7 @@
...
@@ -4,6 +4,7 @@
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import
tensorflow
as
tf
import
tensorflow
as
tf
import
itertools
,
re
from
six.moves
import
zip
,
range
from
six.moves
import
zip
,
range
from
..utils
import
*
from
..utils
import
*
...
@@ -104,30 +105,45 @@ class AsyncMultiGPUTrainer(MultiGPUTrainer):
...
@@ -104,30 +105,45 @@ class AsyncMultiGPUTrainer(MultiGPUTrainer):
summary_moving_average
(),
name
=
'train_op'
)
summary_moving_average
(),
name
=
'train_op'
)
describe_model
()
describe_model
()
self
.
_start_async_threads
(
grad_list
)
with
freeze_collection
(
self
.
SUMMARY_BACKUP_KEYS
):
self
.
_build_predict_tower
()
self
.
main_loop
()
def
_start_async_threads
(
self
,
grad_list
):
# prepare train_op for the rest of the towers
# prepare train_op for the rest of the towers
# itertools.count is atomic w.r.t. python threads
self
.
async_step_counter
=
itertools
.
count
()
self
.
training_threads
=
[]
self
.
training_threads
=
[]
for
k
in
range
(
1
,
self
.
config
.
nr_tower
):
for
k
in
range
(
1
,
self
.
config
.
nr_tower
):
train_op
=
self
.
config
.
optimizer
.
apply_gradients
(
grad_list
[
k
])
train_op
=
self
.
config
.
optimizer
.
apply_gradients
(
grad_list
[
k
])
f
=
lambda
op
=
train_op
:
self
.
sess
.
run
([
op
])
# avoid late-binding
def
f
(
op
=
train_op
):
# avoid late-binding
self
.
sess
.
run
([
op
])
self
.
async_step_counter
.
next
()
th
=
LoopThread
(
f
)
th
=
LoopThread
(
f
)
th
.
pause
()
th
.
pause
()
th
.
start
()
th
.
start
()
self
.
training_threads
.
append
(
th
)
self
.
training_threads
.
append
(
th
)
self
.
async_running
=
False
self
.
async_running
=
False
with
freeze_collection
(
self
.
SUMMARY_BACKUP_KEYS
):
self
.
_build_predict_tower
()
self
.
main_loop
()
def
run_step
(
self
):
def
run_step
(
self
):
if
not
self
.
async_running
:
if
not
self
.
async_running
:
self
.
async_running
=
True
self
.
async_running
=
True
for
th
in
self
.
training_threads
:
# resume all threads
for
th
in
self
.
training_threads
:
# resume all threads
th
.
resume
()
th
.
resume
()
self
.
async_step_counter
.
next
()
super
(
AsyncMultiGPUTrainer
,
self
)
.
run_step
()
super
(
AsyncMultiGPUTrainer
,
self
)
.
run_step
()
def
_trigger_epoch
(
self
):
def
_trigger_epoch
(
self
):
self
.
async_running
=
False
self
.
async_running
=
False
for
th
in
self
.
training_threads
:
for
th
in
self
.
training_threads
:
th
.
pause
()
th
.
pause
()
try
:
async_step_total_cnt
=
int
(
re
.
findall
(
'[0-9]+'
,
self
.
async_step_counter
.
__str__
())[
0
])
self
.
write_scalar_summary
(
'async_global_step'
,
async_step_total_cnt
)
except
:
pass
super
(
AsyncMultiGPUTrainer
,
self
)
.
_trigger_epoch
()
super
(
AsyncMultiGPUTrainer
,
self
)
.
_trigger_epoch
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment