Commit 2b4ec72e authored by Yuxin Wu's avatar Yuxin Wu

improve find_library

parent 25bf4336
...@@ -5,14 +5,12 @@ from collections import defaultdict ...@@ -5,14 +5,12 @@ from collections import defaultdict
from six.moves import map from six.moves import map
from tabulate import tabulate from tabulate import tabulate
import os import os
import re
import sys import sys
import tensorflow as tf import tensorflow as tf
from ..compat import tfv1 from ..compat import tfv1
from ..utils.argtools import graph_memoized from ..utils.argtools import graph_memoized
from ..utils.concurrency import subproc_call from ..utils.utils import find_library_full_path as find_library
from ..utils import change_env
from ..utils.nvml import NVMLContext from ..utils.nvml import NVMLContext
from ..libinfo import __git_version__ from ..libinfo import __git_version__
...@@ -174,7 +172,8 @@ def collect_env_info(): ...@@ -174,7 +172,8 @@ def collect_env_info():
data.append(("Tensorpack", __git_version__)) data.append(("Tensorpack", __git_version__))
data.append(("TensorFlow", tfv1.VERSION + "/" + tfv1.GIT_VERSION)) data.append(("TensorFlow", tfv1.VERSION + "/" + tfv1.GIT_VERSION))
data.append(("TF Compiler Version", tfv1.COMPILER_VERSION)) data.append(("TF Compiler Version", tfv1.COMPILER_VERSION))
data.append(("TF CUDA support", tf.test.is_built_with_cuda())) has_cuda = tf.test.is_built_with_cuda()
data.append(("TF CUDA support", has_cuda))
try: try:
from tensorflow.python.framework import test_util from tensorflow.python.framework import test_util
...@@ -188,49 +187,27 @@ def collect_env_info(): ...@@ -188,49 +187,27 @@ def collect_env_info():
except Exception: except Exception:
pass pass
def find_library_with_ldconfig(ldconfig, lib): if has_cuda:
# Read sonames from ldconfig: may not be accurate data.append(("Nvidia Driver", find_library("nvidia-ml")))
# similar to from ctypes.util import find_library, but with full path data.append(("CUDA", find_library("cudart")))
expr = r'\s+(lib%s\.[^\s]+)\s+\(.*=>\s+(.*)' % (re.escape(lib)) data.append(("CUDNN", find_library("cudnn")))
res = re.search(expr, ldconfig) data.append(("NCCL", find_library("nccl")))
if not res:
return None # List devices with NVML
else: data.append(
ret = res.group(2) ("CUDA_VISIBLE_DEVICES",
return os.path.realpath(ret) os.environ.get("CUDA_VISIBLE_DEVICES", str(None))))
try:
try: devs = defaultdict(list)
with change_env('LC_ALL', 'C'), change_env('LANG', 'C'): with NVMLContext() as ctx:
ldconfig, ret = subproc_call("ldconfig -p") for idx, dev in enumerate(ctx.devices()):
assert ret == 0 devs[dev.name()].append(str(idx))
ldconfig = ldconfig.decode('utf-8')
for devname, devids in devs.items():
def find_library(x): data.append(
return find_library_with_ldconfig(ldconfig, x) ("GPU " + ",".join(devids), devname))
except Exception:
except Exception: data.append(("GPU", "Not found with NVML"))
from ctypes.util import find_library
data.append(("CUDA", find_library("cudart")))
data.append(("CUDNN", find_library("cudnn")))
data.append(("NCCL", find_library("nccl")))
# List devices with NVML
data.append(
("CUDA_VISIBLE_DEVICES",
os.environ.get("CUDA_VISIBLE_DEVICES", str(None))))
try:
devs = defaultdict(list)
with NVMLContext() as ctx:
for idx, dev in enumerate(ctx.devices()):
devs[dev.name()].append(str(idx))
for devname, devids in devs.items():
data.append(
("GPU " + ",".join(devids) + " Model",
devname))
except Exception:
pass
# Other important dependencies # Other important dependencies
try: try:
......
...@@ -238,7 +238,7 @@ def start_proc_mask_signal(proc): ...@@ -238,7 +238,7 @@ def start_proc_mask_signal(proc):
def subproc_call(cmd, timeout=None): def subproc_call(cmd, timeout=None):
""" """
Execute a command with timeout, and return both STDOUT/STDERR. Execute a command with timeout, and return STDOUT and STDERR
Args: Args:
cmd(str): the command to execute. cmd(str): the command to execute.
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
import inspect import inspect
import numpy as np import numpy as np
import re
import os import os
import sys import sys
from contextlib import contextmanager from contextlib import contextmanager
...@@ -11,6 +12,7 @@ from datetime import datetime, timedelta ...@@ -11,6 +12,7 @@ from datetime import datetime, timedelta
from tqdm import tqdm from tqdm import tqdm
from . import logger from . import logger
from .concurrency import subproc_call
__all__ = ['change_env', __all__ = ['change_env',
'get_rng', 'get_rng',
...@@ -216,3 +218,76 @@ def get_tqdm(*args, **kwargs): ...@@ -216,3 +218,76 @@ def get_tqdm(*args, **kwargs):
""" Similar to :func:`tqdm.tqdm()`, """ Similar to :func:`tqdm.tqdm()`,
but use tensorpack's default options to have consistent style. """ but use tensorpack's default options to have consistent style. """
return tqdm(*args, **get_tqdm_kwargs(**kwargs)) return tqdm(*args, **get_tqdm_kwargs(**kwargs))
def find_library_full_path(name):
"""
Similar to `from ctypes.util import find_library`, but try
to return full path if possible.
"""
from ctypes.util import find_library
if os.name == "posix" and sys.platform == "darwin":
# on Mac, ctypes already returns full path
return find_library(name)
def _use_proc_maps(name):
"""
Find so from /proc/pid/maps
Only works with libraries that has already been loaded.
But this is the most accurate method -- it finds the exact library that's being used.
"""
procmap = os.path.join('/proc', str(os.getpid()), 'maps')
if not os.path.isfile(procmap):
return None
with open(procmap, 'r') as f:
for line in f:
line = line.strip().split(' ')
sofile = line[-1]
basename = os.path.basename(sofile)
if 'lib' + name + '.so' in basename:
if os.path.isfile(sofile):
return os.path.realpath(sofile)
# The following two methods come from https://github.com/python/cpython/blob/master/Lib/ctypes/util.py
def _use_ld(name):
"""
Find so with `ld -lname -Lpath`.
It will search for files in LD_LIBRARY_PATH, but not in ldconfig.
"""
cmd = "ld -t -l{} -o {}".format(name, os.devnull)
ld_lib_path = os.environ.get('LD_LIBRARY_PATH', '')
for d in ld_lib_path.split(':'):
cmd = cmd + " -L " + d
result, ret = subproc_call(cmd + '|| true')
expr = r'[^\(\)\s]*lib%s\.[^\(\)\s]*' % re.escape(name)
res = re.search(expr, result.decode('utf-8'))
if res:
res = res.group(0)
if not os.path.isfile(res):
return None
return os.path.realpath(res)
def _use_ldconfig(name):
"""
Find so in `ldconfig -p`.
It does not handle LD_LIBRARY_PATH.
"""
with change_env('LC_ALL', 'C'), change_env('LANG', 'C'):
ldconfig, ret = subproc_call("ldconfig -p")
ldconfig = ldconfig.decode('utf-8')
if ret != 0:
return None
expr = r'\s+(lib%s\.[^\s]+)\s+\(.*=>\s+(.*)' % (re.escape(name))
res = re.search(expr, ldconfig)
if not res:
return None
else:
ret = res.group(2)
return os.path.realpath(ret)
if sys.platform.startswith('linux'):
return _use_proc_maps(name) or _use_ld(name) or _use_ldconfig(name) or find_library(name)
return find_library(name) # don't know what to do
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment