Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
23c643fb
Commit
23c643fb
authored
Jun 04, 2017
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
remove supervisor and use sessionmanager
parent
a53da5ab
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
22 additions
and
21 deletions
+22
-21
tensorpack/models/common.py
tensorpack/models/common.py
+1
-1
tensorpack/tfutils/summary.py
tensorpack/tfutils/summary.py
+2
-1
tensorpack/train/distributed.py
tensorpack/train/distributed.py
+19
-19
No files found.
tensorpack/models/common.py
View file @
23c643fb
...
...
@@ -136,7 +136,7 @@ def layer_register(
# log shape info and add activation
logger
.
info
(
"{} output: {}"
.
format
(
scope
.
name
,
get_shape_str
(
outputs
)))
_LAYER_LOGGED
.
add
(
scope
.
name
)
_LAYER_LOGGED
.
add
(
scope
_
name
)
else
:
# run the actual function
outputs
=
func
(
*
args
,
**
actual_args
)
...
...
tensorpack/tfutils/summary.py
View file @
23c643fb
...
...
@@ -154,7 +154,8 @@ def add_moving_summary(v, *args, **kwargs):
for
x
in
v
:
assert
isinstance
(
x
,
tf
.
Tensor
),
x
assert
x
.
get_shape
()
.
ndims
==
0
,
x
.
get_shape
()
# TODO will produce tower0/xxx?
# TODO will produce variable tower0/xxx?
# TODO not saved under distributed
# TODO use zero_debias
gs
=
get_global_step_var
()
with
tf
.
name_scope
(
None
),
tf
.
device
(
gs
.
device
):
...
...
tensorpack/train/distributed.py
View file @
23c643fb
...
...
@@ -215,26 +215,26 @@ class DistributedReplicatedTrainer(SingleCostFeedfreeTrainer):
or
self
.
config
.
session_config
is
not
None
:
raise
ValueError
(
"Cannot set session_creator or session_config for distributed training! "
"To use a custom session config, pass it to the tf.train.Server constructor."
)
# TODO use scaffold + monitored session
class
SupervisedSessionCreator
(
tf
.
train
.
SessionCreator
):
def
__init__
(
self
,
is_chief
,
target
):
self
.
is_chief
=
is_chief
self
.
target
=
target
"To use a custom session config, pass it with tf.train.Server."
)
init_op
=
tf
.
global_variables_initializer
()
local_init_op
=
tf
.
local_variables_initializer
()
ready_op
=
tf
.
report_uninitialized_variables
()
sm
=
tf
.
train
.
SessionManager
(
local_init_op
=
local_init_op
,
ready_op
=
ready_op
,
graph
=
tf
.
get_default_graph
())
def
_create_session
():
if
self
.
is_chief
:
return
sm
.
prepare_session
(
master
=
self
.
server
.
target
,
init_op
=
init_op
)
else
:
return
sm
.
wait_for_session
(
master
=
self
.
server
.
target
)
class
_Creator
(
tf
.
train
.
SessionCreator
):
def
create_session
(
self
):
# supervisor will finalize the graph..
self
.
sv
=
tf
.
train
.
Supervisor
(
is_chief
=
self
.
is_chief
,
logdir
=
None
,
saver
=
None
,
global_step
=
get_global_step_var
(),
summary_op
=
None
,
save_model_secs
=
0
,
summary_writer
=
None
)
return
self
.
sv
.
prepare_or_wait_for_session
(
master
=
self
.
target
,
start_standard_services
=
False
)
self
.
config
.
session_creator
=
SupervisedSessionCreator
(
self
.
is_chief
,
self
.
server
.
target
)
return
_create_session
()
self
.
config
.
session_creator
=
_Creator
()
def
add_sync_queues_and_barrier
(
self
,
name_prefix
,
enqueue_after_list
):
"""Adds ops to enqueue on all worker queues.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment