Commit adc9409f authored by Yuxin Wu's avatar Yuxin Wu

update serializer; update tests

parent 78762b38
...@@ -52,6 +52,14 @@ Note that the above methods only prevent variables being updated by SGD. ...@@ -52,6 +52,14 @@ Note that the above methods only prevent variables being updated by SGD.
Some variables may be updated by other means, Some variables may be updated by other means,
e.g., BatchNorm statistics are updated through the `UPDATE_OPS` collection and the [RunUpdateOps](../modules/callbacks.html#tensorpack.callbacks.RunUpdateOps) callback. e.g., BatchNorm statistics are updated through the `UPDATE_OPS` collection and the [RunUpdateOps](../modules/callbacks.html#tensorpack.callbacks.RunUpdateOps) callback.
## The model does not run on CPUs?
Some TensorFlow ops are not implemented on CPUs.
For example, it does not support many ops in NCHW format on CPUs.
Note that if you use MKL-enabled version of TensorFlow, it supports more NCHW ops.
In general, you need to implement the model in a way your version of TensorFlow supports.
## My training seems slow. Why? ## My training seems slow. Why?
Checkout the [Performance Tuning tutorial](performance-tuning.html) Checkout the [Performance Tuning tutorial](performance-tuning.html)
...@@ -66,7 +66,7 @@ setup( ...@@ -66,7 +66,7 @@ setup(
], ],
tests_require=['flake8', 'scikit-image'], tests_require=['flake8', 'scikit-image'],
extras_require={ extras_require={
'all': ['pillow', 'scipy', 'h5py', 'lmdb>=0.92', 'matplotlib', 'scikit-learn'], 'all': ['scipy', 'h5py', 'lmdb>=0.92', 'matplotlib', 'scikit-learn'],
'all: "linux" in sys_platform': ['python-prctl'], 'all: "linux" in sys_platform': ['python-prctl'],
'all: python_version < "3.0"': ['tornado'], 'all: python_version < "3.0"': ['tornado'],
}, },
......
...@@ -43,7 +43,8 @@ for _, module_name, __ in iter_modules( ...@@ -43,7 +43,8 @@ for _, module_name, __ in iter_modules(
srcpath = os.path.join(_CURR_DIR, module_name + '.py') srcpath = os.path.join(_CURR_DIR, module_name + '.py')
if not os.path.isfile(srcpath): if not os.path.isfile(srcpath):
continue continue
if not module_name.startswith('_') and \ if "_test" not in module_name and \
not module_name.startswith('_') and \
module_name not in __SKIP: module_name not in __SKIP:
_global_import(module_name) _global_import(module_name)
......
...@@ -48,5 +48,5 @@ else: ...@@ -48,5 +48,5 @@ else:
srcpath = os.path.join(_CURR_DIR, module_name + '.py') srcpath = os.path.join(_CURR_DIR, module_name + '.py')
if not os.path.isfile(srcpath): if not os.path.isfile(srcpath):
continue continue
if not module_name.startswith('_'): if not module_name.startswith('_') and "_test" not in module_name:
global_import(module_name) global_import(module_name)
...@@ -44,5 +44,7 @@ for _, module_name, _ in iter_modules( ...@@ -44,5 +44,7 @@ for _, module_name, _ in iter_modules(
continue continue
if module_name.startswith('_'): if module_name.startswith('_'):
continue continue
if "_test" in module_name:
continue
if module_name not in _SKIP: if module_name not in _SKIP:
_global_import(module_name) _global_import(module_name)
...@@ -5,6 +5,9 @@ ...@@ -5,6 +5,9 @@
import logging import logging
import unittest import unittest
import tensorflow as tf import tensorflow as tf
import numpy as np
from .pool import FixedUnPooling
class TestModel(unittest.TestCase): class TestModel(unittest.TestCase):
...@@ -24,6 +27,47 @@ class TestModel(unittest.TestCase): ...@@ -24,6 +27,47 @@ class TestModel(unittest.TestCase):
return tf.Variable(args[0]) return tf.Variable(args[0])
class TestPool(TestModel):
def test_FixedUnPooling(self):
h, w = 3, 4
scale = 2
mat = np.random.rand(h, w, 3).astype('float32')
inp = self.make_variable(mat)
inp = tf.reshape(inp, [1, h, w, 3])
output = FixedUnPooling('unpool', inp, scale)
res = self.run_variable(output)
self.assertEqual(res.shape, (1, scale * h, scale * w, 3))
# mat is on corner
ele = res[0, ::scale, ::scale, 0]
self.assertTrue((ele == mat[:, :, 0]).all())
# the rest are zeros
res[0, ::scale, ::scale, :] = 0
self.assertTrue((res == 0).all())
# Below was originally for the BilinearUpsample layer used in the HED example
# def test_BilinearUpSample(self):
# h, w = 12, 12
# scale = 2
#
# mat = np.random.rand(h, w).astype('float32')
# inp = self.make_variable(mat)
# inp = tf.reshape(inp, [1, h, w, 1])
#
# output = BilinearUpSample(inp, scale)
# res = self.run_variable(output)[0, :, :, 0]
#
# from skimage.transform import rescale
# res2 = rescale(mat, scale, mode='edge')
#
# diff = np.abs(res2 - res)
#
# # if not diff.max() < 1e-4:
# # import IPython
# # IPython.embed(config=IPython.terminal.ipapp.load_default_config())
# self.assertTrue(diff.max() < 1e-4, diff.max())
def run_test_case(case): def run_test_case(case):
suite = unittest.TestLoader().loadTestsFromTestCase(case) suite = unittest.TestLoader().loadTestsFromTestCase(case)
unittest.TextTestRunner(verbosity=2).run(suite) unittest.TextTestRunner(verbosity=2).run(suite)
......
...@@ -5,7 +5,6 @@ import numpy as np ...@@ -5,7 +5,6 @@ import numpy as np
from ..compat import tfv1 as tf # this should be avoided first in model code from ..compat import tfv1 as tf # this should be avoided first in model code
from ..utils.argtools import get_data_format, shape2d from ..utils.argtools import get_data_format, shape2d
from ._test import TestModel
from .common import layer_register from .common import layer_register
from .shape_utils import StaticDynamicShape from .shape_utils import StaticDynamicShape
from .tflayer import convert_to_tflayer_args from .tflayer import convert_to_tflayer_args
...@@ -137,44 +136,3 @@ def FixedUnPooling(x, shape, unpool_mat=None, data_format='channels_last'): ...@@ -137,44 +136,3 @@ def FixedUnPooling(x, shape, unpool_mat=None, data_format='channels_last'):
ret.set_shape(tf.TensorShape(output_shape.get_static())) ret.set_shape(tf.TensorShape(output_shape.get_static()))
return ret return ret
class TestPool(TestModel):
def test_FixedUnPooling(self):
h, w = 3, 4
scale = 2
mat = np.random.rand(h, w, 3).astype('float32')
inp = self.make_variable(mat)
inp = tf.reshape(inp, [1, h, w, 3])
output = FixedUnPooling('unpool', inp, scale)
res = self.run_variable(output)
self.assertEqual(res.shape, (1, scale * h, scale * w, 3))
# mat is on corner
ele = res[0, ::scale, ::scale, 0]
self.assertTrue((ele == mat[:, :, 0]).all())
# the rest are zeros
res[0, ::scale, ::scale, :] = 0
self.assertTrue((res == 0).all())
# Below was originally for the BilinearUpsample layer used in the HED example
# def test_BilinearUpSample(self):
# h, w = 12, 12
# scale = 2
#
# mat = np.random.rand(h, w).astype('float32')
# inp = self.make_variable(mat)
# inp = tf.reshape(inp, [1, h, w, 1])
#
# output = BilinearUpSample(inp, scale)
# res = self.run_variable(output)[0, :, :, 0]
#
# from skimage.transform import rescale
# res2 = rescale(mat, scale, mode='edge')
#
# diff = np.abs(res2 - res)
#
# # if not diff.max() < 1e-4:
# # import IPython
# # IPython.embed(config=IPython.terminal.ipapp.load_default_config())
# self.assertTrue(diff.max() < 1e-4, diff.max())
...@@ -2,14 +2,11 @@ ...@@ -2,14 +2,11 @@
# File: serialize.py # File: serialize.py
import os import os
import sys
import pickle
import msgpack import msgpack
import msgpack_numpy import msgpack_numpy
from . import logger
from .develop import create_dummy_func
msgpack_numpy.patch() msgpack_numpy.patch()
assert msgpack.version >= (0, 5, 2) assert msgpack.version >= (0, 5, 2)
...@@ -19,83 +16,90 @@ __all__ = ['loads', 'dumps'] ...@@ -19,83 +16,90 @@ __all__ = ['loads', 'dumps']
MAX_MSGPACK_LEN = 1000000000 MAX_MSGPACK_LEN = 1000000000
def dumps_msgpack(obj): class MsgpackSerializer(object):
"""
Serialize an object. @staticmethod
def dumps(obj):
Returns: """
Implementation-dependent bytes-like object. Serialize an object.
"""
return msgpack.dumps(obj, use_bin_type=True) Returns:
Implementation-dependent bytes-like object.
"""
def loads_msgpack(buf): return msgpack.dumps(obj, use_bin_type=True)
"""
Args: @staticmethod
buf: the output of `dumps`. def loads(buf):
""" """
# Since 0.6, the default max size was set to 1MB. Args:
# We change it to approximately 1G. buf: the output of `dumps`.
return msgpack.loads(buf, raw=False, """
max_bin_len=MAX_MSGPACK_LEN, # Since 0.6, the default max size was set to 1MB.
max_array_len=MAX_MSGPACK_LEN, # We change it to approximately 1G.
max_map_len=MAX_MSGPACK_LEN, return msgpack.loads(buf, raw=False,
max_str_len=MAX_MSGPACK_LEN) max_bin_len=MAX_MSGPACK_LEN,
max_array_len=MAX_MSGPACK_LEN,
max_map_len=MAX_MSGPACK_LEN,
def dumps_pyarrow(obj): max_str_len=MAX_MSGPACK_LEN)
"""
Serialize an object.
class PyarrowSerializer(object):
Returns: @staticmethod
Implementation-dependent bytes-like object. def dumps(obj):
May not be compatible across different versions of pyarrow. """
""" Serialize an object.
return pa.serialize(obj).to_buffer()
Returns:
Implementation-dependent bytes-like object.
def loads_pyarrow(buf): May not be compatible across different versions of pyarrow.
""" """
Args:
buf: the output of `dumps`.
"""
return pa.deserialize(buf)
# import pyarrow has a lot of side effect:
# https://github.com/apache/arrow/pull/2329
# https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/TMqRaT-H2bI
# So we use msgpack as default.
if os.environ.get('TENSORPACK_SERIALIZE', 'msgpack') == 'pyarrow':
try:
import pyarrow as pa import pyarrow as pa
except ImportError: return pa.serialize(obj).to_buffer()
loads_pyarrow = create_dummy_func('loads_pyarrow', ['pyarrow']) # noqa
dumps_pyarrow = create_dummy_func('dumps_pyarrow', ['pyarrow']) # noqa @staticmethod
def dumps_bytes(obj):
if 'horovod' in sys.modules: """
logger.warn("Horovod and pyarrow may have symbol conflicts. " Returns:
"Uninstall pyarrow and use msgpack instead.") bytes
loads = loads_pyarrow """
dumps = dumps_pyarrow return PyarrowSerializer.dumps(obj).to_pybytes()
@staticmethod
def loads(buf):
"""
Args:
buf: the output of `dumps` or `dumps_bytes`.
"""
import pyarrow as pa
return pa.deserialize(buf)
class PickleSerializer(object):
@staticmethod
def dumps(obj):
"""
Returns:
bytes
"""
return pickle.dumps(obj, protocol=-1)
@staticmethod
def loads(buf):
"""
Args:
bytes
"""
return pickle.loads(buf)
_DEFAULT_S = os.environ.get('TENSORPACK_SERIALIZE', 'msgpack')
if _DEFAULT_S == "pyarrow":
dumps = PyarrowSerializer.dumps_bytes
loads = PyarrowSerializer.loads
elif _DEFAULT_S == "pickle":
dumps = PickleSerializer.dumps
loads = PickleSerializer.loads
else: else:
loads = loads_msgpack dumps = MsgpackSerializer.dumps
dumps = dumps_msgpack loads = MsgpackSerializer.loads
class NonPicklableWrapper(object):
"""
TODO
https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py
"""
def __init__(self, obj):
self._obj = obj
def __reduce__(self):
import dill
s = dill.dumps(self._obj)
return dill.loads, (s, )
def __call__(self, *args, **kwargs):
return self._obj(*args, **kwargs)
...@@ -14,11 +14,14 @@ python -c "from tensorflow.python.training.monitored_session import _HookedSessi ...@@ -14,11 +14,14 @@ python -c "from tensorflow.python.training.monitored_session import _HookedSessi
python -c "import tensorflow as tf; tf.Operation._add_control_input" python -c "import tensorflow as tf; tf.Operation._add_control_input"
# run tests # run tests
python -m tensorpack.callbacks.param_test python -m unittest tensorpack.callbacks.param_test
python -m tensorpack.tfutils.unit_tests python -m unittest tensorpack.tfutils.unit_tests
python -m unittest tensorpack.dataflow.imgaug._test python -m unittest tensorpack.dataflow.imgaug.imgaug_test
python -m unittest tensorpack.models.models_test
# use pyarrow after we organize the serializers. # use pyarrow after we organize the serializers.
# TENSORPACK_SERIALIZE=pyarrow python test_serializer.py # TENSORPACK_SERIALIZE=pyarrow python ...
TENSORPACK_SERIALIZE=msgpack python test_serializer.py python -m unittest tensorpack.dataflow.serialize_test
# e2e tests
python -m unittest discover -v python -m unittest discover -v
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment