Commit f831a46e authored by Yuxin Wu's avatar Yuxin Wu

Do not make unnecessary calls to NVML (#1134)

parent 6f02650d
...@@ -67,7 +67,7 @@ class GPUUtilizationTracker(Callback): ...@@ -67,7 +67,7 @@ class GPUUtilizationTracker(Callback):
self._evt.set() self._evt.set()
def _after_epoch(self): def _after_epoch(self):
while self._evt.is_set(): # unlikely while self._evt.is_set(): # unlikely, unless the epoch is extremely fast
pass pass
self._evt.set() self._evt.set()
...@@ -87,20 +87,21 @@ class GPUUtilizationTracker(Callback): ...@@ -87,20 +87,21 @@ class GPUUtilizationTracker(Callback):
self._proc.terminate() self._proc.terminate()
def worker(self, evt, rst_queue, stop_evt): def worker(self, evt, rst_queue, stop_evt):
while True: with NVMLContext() as ctx:
try: devices = [ctx.device(i) for i in self._devices]
evt.wait() # start epoch while True:
evt.clear() try:
if stop_evt.is_set(): # or on exit evt.wait() # start epoch
return evt.clear()
if stop_evt.is_set(): # or on exit
stats = np.zeros((len(self._devices),), dtype='f4') return
cnt = 0
with NVMLContext() as ctx: stats = np.zeros((len(self._devices),), dtype='f4')
cnt = 0
while True: while True:
time.sleep(1) time.sleep(1)
data = [ctx.device(i).utilization()['gpu'] for i in self._devices] data = [d.utilization()['gpu'] for d in devices]
data = list(map(float, data)) data = list(map(float, data))
stats += data stats += data
cnt += 1 cnt += 1
...@@ -115,10 +116,10 @@ class GPUUtilizationTracker(Callback): ...@@ -115,10 +116,10 @@ class GPUUtilizationTracker(Callback):
cnt -= 1 cnt -= 1
rst_queue.put(stats / cnt) rst_queue.put(stats / cnt)
break break
except Exception: except Exception:
logger.exception("Exception in GPUUtilizationTracker.worker") logger.exception("Exception in GPUUtilizationTracker.worker")
rst_queue.put(-1) rst_queue.put(-1)
return return
# Can add more features from tfprof # Can add more features from tfprof
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment