Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
238603b7
Commit
238603b7
authored
Oct 13, 2018
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Don't let NewSessionCreator finalize graph
parent
4f529aed
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
18 additions
and
10 deletions
+18
-10
tensorpack/tfutils/sesscreate.py
tensorpack/tfutils/sesscreate.py
+12
-8
tensorpack/train/base.py
tensorpack/train/base.py
+1
-1
tensorpack/train/trainers.py
tensorpack/train/trainers.py
+5
-1
No files found.
tensorpack/tfutils/sesscreate.py
View file @
238603b7
...
...
@@ -10,24 +10,25 @@ __all__ = ['NewSessionCreator', 'ReuseSessionCreator', 'SessionCreatorAdapter']
"""
A SessionCreator should:
(optionally) finalize the graph
create the session
initialize all variables
return a session that is ready to use
not finalize the graph
"""
class
NewSessionCreator
(
tf
.
train
.
Chief
SessionCreator
):
def
__init__
(
self
,
target
=
''
,
graph
=
None
,
config
=
None
):
class
NewSessionCreator
(
tf
.
train
.
SessionCreator
):
def
__init__
(
self
,
target
=
''
,
config
=
None
):
"""
Args:
target,
graph,
config: same as :meth:`Session.__init__()`.
target, config: same as :meth:`Session.__init__()`.
config: a :class:`tf.ConfigProto` instance, defaults to :func:`tfutils.get_default_sess_config()`
"""
assert
graph
is
None
self
.
config
=
config
self
.
target
=
target
if
config
is
None
:
# distributd trainer doesn't support user-provided config
# distribut
e
d trainer doesn't support user-provided config
# we set this attribute so that they can check
self
.
user_provided_config
=
False
config
=
get_default_sess_config
()
...
...
@@ -37,8 +38,11 @@ class NewSessionCreator(tf.train.ChiefSessionCreator):
"User-provided custom session config may not work due to TF
\
bugs. See https://github.com/tensorpack/tensorpack/issues/497 for workarounds."
)
self
.
config
=
config
super
(
NewSessionCreator
,
self
)
.
__init__
(
master
=
target
,
config
=
config
)
def
create_session
(
self
):
sess
=
tf
.
Session
(
target
=
self
.
target
,
config
=
self
.
config
)
sess
.
run
(
tf
.
global_variables_initializer
())
sess
.
run
(
tf
.
local_variables_initializer
())
return
sess
class
ReuseSessionCreator
(
tf
.
train
.
SessionCreator
):
...
...
tensorpack/train/base.py
View file @
238603b7
...
...
@@ -214,7 +214,7 @@ class Trainer(object):
if
not
isinstance
(
session_init
,
JustCurrentSession
):
logger
.
warn
(
"This is not a chief worker, 'session_init' was ignored!"
)
self
.
sess
.
graph
.
finalize
()
# possibly already finalized by ChiefSessionCreator
self
.
sess
.
graph
.
finalize
()
logger
.
info
(
"Graph Finalized."
)
@
call_only_once
...
...
tensorpack/train/trainers.py
View file @
238603b7
...
...
@@ -404,11 +404,12 @@ class HorovodTrainer(SingleCostTrainer):
@
HIDE_DOC
def
initialize
(
self
,
session_creator
,
session_init
):
# broadcast_op should be the last setup_graph: it needs to be created
# "right before" the
session is initi
alized,
# "right before" the
graph is fin
alized,
# because it needs to capture all the variables (which may be created by callbacks).
with
tf
.
name_scope
(
'horovod_broadcast'
):
self
.
_broadcast_op
=
hvd
.
broadcast_global_variables
(
0
)
# it's important that our NewSessionCreator does not finalize the graph
if
not
isinstance
(
session_creator
,
NewSessionCreator
):
raise
ValueError
(
"session_creator has to be `NewSessionCreator` for horovod training! "
)
...
...
@@ -423,6 +424,9 @@ class HorovodTrainer(SingleCostTrainer):
# This broadcast belongs to the "intialize" stage
# It should not be delayed to the "before_train" stage.
# TODO:
# 1. a allgather helper to concat strings
# 2. check variables on each rank match each other, print warnings, and broadcast the common set.
logger
.
info
(
"Broadcasting initialized variables ..."
)
self
.
sess
.
run
(
self
.
_broadcast_op
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment