Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
3356b8de
Commit
3356b8de
authored
Nov 27, 2018
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Check GPU availability from session
parent
f34f454e
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
14 additions
and
4 deletions
+14
-4
docs/conf.py
docs/conf.py
+1
-0
tensorpack/callbacks/prof.py
tensorpack/callbacks/prof.py
+5
-2
tensorpack/tfutils/common.py
tensorpack/tfutils/common.py
+8
-0
tensorpack/train/trainers.py
tensorpack/train/trainers.py
+0
-2
No files found.
docs/conf.py
View file @
3356b8de
...
@@ -48,6 +48,7 @@ except ImportError:
...
@@ -48,6 +48,7 @@ except ImportError:
for
mod_name
in
MOCK_MODULES
:
for
mod_name
in
MOCK_MODULES
:
sys
.
modules
[
mod_name
]
=
mock
.
Mock
(
name
=
mod_name
)
sys
.
modules
[
mod_name
]
=
mock
.
Mock
(
name
=
mod_name
)
sys
.
modules
[
'cv2'
]
.
__version__
=
'3.2.1'
# fake version
sys
.
modules
[
'cv2'
]
.
__version__
=
'3.2.1'
# fake version
sys
.
modules
[
'msgpack'
]
.
version
=
(
0
,
5
,
2
)
import
tensorpack
import
tensorpack
...
...
tensorpack/callbacks/prof.py
View file @
3356b8de
...
@@ -15,6 +15,7 @@ from ..utils import logger
...
@@ -15,6 +15,7 @@ from ..utils import logger
from
..utils.concurrency
import
ensure_proc_terminate
,
start_proc_mask_signal
from
..utils.concurrency
import
ensure_proc_terminate
,
start_proc_mask_signal
from
..utils.gpu
import
get_num_gpu
from
..utils.gpu
import
get_num_gpu
from
..utils.nvml
import
NVMLContext
from
..utils.nvml
import
NVMLContext
from
..tfutils.common
import
gpu_available_in_session
__all__
=
[
'GPUUtilizationTracker'
,
'GraphProfiler'
,
'PeakMemoryTracker'
]
__all__
=
[
'GPUUtilizationTracker'
,
'GraphProfiler'
,
'PeakMemoryTracker'
]
...
@@ -53,7 +54,7 @@ class GPUUtilizationTracker(Callback):
...
@@ -53,7 +54,7 @@ class GPUUtilizationTracker(Callback):
assert
len
(
self
.
_devices
),
"[GPUUtilizationTracker] No GPU device given!"
assert
len
(
self
.
_devices
),
"[GPUUtilizationTracker] No GPU device given!"
def
_before_train
(
self
):
def
_before_train
(
self
):
# assert tf.test.is_gpu_available()
assert
gpu_available_in_session
(),
"[GPUUtilizationTracker] needs GPU!"
self
.
_evt
=
mp
.
Event
()
self
.
_evt
=
mp
.
Event
()
self
.
_stop_evt
=
mp
.
Event
()
self
.
_stop_evt
=
mp
.
Event
()
self
.
_queue
=
mp
.
Queue
()
self
.
_queue
=
mp
.
Queue
()
...
@@ -212,8 +213,10 @@ class PeakMemoryTracker(Callback):
...
@@ -212,8 +213,10 @@ class PeakMemoryTracker(Callback):
ops
.
append
(
MaxBytesInUse
())
ops
.
append
(
MaxBytesInUse
())
self
.
_fetches
=
tf
.
train
.
SessionRunArgs
(
fetches
=
ops
)
self
.
_fetches
=
tf
.
train
.
SessionRunArgs
(
fetches
=
ops
)
def
_before_train
(
self
):
assert
gpu_available_in_session
(),
"PeakMemoryTracker only supports GPU!"
def
_before_run
(
self
,
_
):
def
_before_run
(
self
,
_
):
# assert tf.test.is_gpu_available(), "PeakMemoryTracker only supports GPU!"
if
self
.
local_step
==
self
.
trainer
.
steps_per_epoch
-
1
:
if
self
.
local_step
==
self
.
trainer
.
steps_per_epoch
-
1
:
return
self
.
_fetches
return
self
.
_fetches
return
None
return
None
...
...
tensorpack/tfutils/common.py
View file @
3356b8de
...
@@ -139,6 +139,14 @@ def get_op_or_tensor_by_name(name):
...
@@ -139,6 +139,14 @@ def get_op_or_tensor_by_name(name):
return
list
(
map
(
f
,
name
))
return
list
(
map
(
f
,
name
))
def
gpu_available_in_session
():
sess
=
tf
.
get_default_session
()
for
dev
in
sess
.
list_devices
():
if
dev
.
device_type
.
lower
()
==
'gpu'
:
return
True
return
False
@
deprecated
(
"You should use get_tf_version_tuple instead due to the existence of TF 1.10"
)
@
deprecated
(
"You should use get_tf_version_tuple instead due to the existence of TF 1.10"
)
def
get_tf_version_number
():
def
get_tf_version_number
():
return
float
(
'.'
.
join
(
tf
.
VERSION
.
split
(
'.'
)[:
2
]))
return
float
(
'.'
.
join
(
tf
.
VERSION
.
split
(
'.'
)[:
2
]))
...
...
tensorpack/train/trainers.py
View file @
3356b8de
...
@@ -430,8 +430,6 @@ class HorovodTrainer(SingleCostTrainer):
...
@@ -430,8 +430,6 @@ class HorovodTrainer(SingleCostTrainer):
except
AttributeError
:
# old horovod does not have local_size
except
AttributeError
:
# old horovod does not have local_size
pass
pass
super
(
HorovodTrainer
,
self
)
.
initialize
(
session_creator
,
session_init
)
super
(
HorovodTrainer
,
self
)
.
initialize
(
session_creator
,
session_init
)
# if not tf.test.is_gpu_available():
# logger.error("tf.test.is_gpu_available() == False")
# This broadcast belongs to the "intialize" stage
# This broadcast belongs to the "intialize" stage
# It should not be delayed to the "before_train" stage.
# It should not be delayed to the "before_train" stage.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment