Use experimental.list_physical_devices to avoid side effects

d7a13cb7 · Yuxin Wu · 88ce4a90 · d7a13cb7 · d7a13cb7 · d7a13cb7
Commit d7a13cb7 authored Apr 26, 2019 by Yuxin Wu
4 changed files
--- a/docs/tutorial/performance-tuning.md
+++ b/docs/tutorial/performance-tuning.md
@@ -2,15 +2,16 @@
 # Performance Tuning
 __We do not know why your training is slow__ 
-(and most of the times it's not due to issues in tensorpack).
+(and most of the times it's not due to issues in tensorpack),
+unless we can reproduce the slowness with your instsructions.
 Tensorpack is designed to be high-performance, as can be seen in the [benchmarks](https://github.com/tensorpack/benchmarks).
 But performance is different across machines and tasks,
 and it requires knowledge of the entire stack to understand what might be wrong.
-Therefore when you have a performance issue, 
+If you need help from others to understand a performance issue you saw, you have to either
-it's not easy to let others understand what goes wrong without doing some investigations by your own.
+allow others to reproduce your slowness, or do some investigations on your own.
-Tensorpack has some tools to make it easier to understand the performance.
+Tensorpack has some tools to make it easier to investigate the performance.
 Here we provide a list of things you can do to understand why your training is slow.
 If you ask for help to understand and improve the speed, PLEASE do the

--- a/tensorpack/libinfo.py
+++ b/tensorpack/libinfo.py
@@ -54,14 +54,32 @@ try:
    _version = tf.__version__.split('.')
    assert (int(_version[0]), int(_version[1])) >= (1, 3), "TF>=1.3 is required!"
    _HAS_TF = True
+except ImportError:
+    print("Failed to import tensorflow.")
+    _HAS_TF = False
+else:
+    # Install stacktrace handler
    try:
        from tensorflow.python.framework import test_util
        test_util.InstallStackTraceHandler()
    except Exception:
        pass
-except ImportError:
-    print("Failed to import tensorflow.")
+    # Monkey-patch tf.test.is_gpu_available to avoid side effects:
-    _HAS_TF = False
+    # https://github.com/tensorflow/tensorflow/issues/26460
+    try:
+        list_dev = tf.config.experimental.list_physical_devices
+    except AttributeError:
+        pass
+    else:
+        old_is_gpu_available = tf.test.is_gpu_available
+        def is_gpu_available(*args, **kwargs):
+            if len(args) == 0 and len(kwargs) == 0:
+                return len(list_dev('GPU')) > 0
+            return old_is_gpu_available(*args, **kwargs)
+        tf.test.is_gpu_available = is_gpu_available
 # These lines will be programatically read/write by setup.py

--- a/tensorpack/train/trainers.py
+++ b/tensorpack/train/trainers.py
@@ -340,6 +340,7 @@ class HorovodTrainer(SingleCostTrainer):
        2. Due to a TF bug (#8136), you must not initialize CUDA context before the trainer starts training.
           Therefore TF functions like `is_gpu_available()` or `list_local_devices()`
           must be avoided.
+           You can, however, use `tf.config.experimental.list_physical_devices('GPU')`, introduced in TF 1.14.
        2. MPI does not like `fork()`. If your dataflow contains multiprocessing, it may cause problems.

--- a/tensorpack/utils/gpu.py
+++ b/tensorpack/utils/gpu.py
@@ -56,12 +56,19 @@ def get_num_gpu():
            return warn_return(ctx.num_devices(), "NVML found nvidia devices. ")
    except Exception:
        # Fallback
-        # Note this will initialize all GPUs and therefore has side effect
-        # https://github.com/tensorflow/tensorflow/issues/8136
        logger.info("Loading local devices by TensorFlow ...")
-        from tensorflow.python.client import device_lib
-        local_device_protos = device_lib.list_local_devices()
+        try:
-        return len([x.name for x in local_device_protos if x.device_type == 'GPU'])
+            import tensorflow as tf
+            # available since TF 1.14
+            gpu_devices = tf.config.experimental.list_physical_devices('GPU')
+        except AttributeError:
+            from tensorflow.python.client import device_lib
+            local_device_protos = device_lib.list_local_devices()
+            # Note this will initialize all GPUs and therefore has side effect
+            # https://github.com/tensorflow/tensorflow/issues/8136
+            gpu_devices = [x.name for x in local_device_protos if x.device_type == 'GPU']
+        return len(gpu_devices)
 get_nr_gpu = get_num_gpu