Commit adc9409f authored by Yuxin Wu's avatar Yuxin Wu

update serializer; update tests

parent 78762b38
......@@ -52,6 +52,14 @@ Note that the above methods only prevent variables being updated by SGD.
Some variables may be updated by other means,
e.g., BatchNorm statistics are updated through the `UPDATE_OPS` collection and the [RunUpdateOps](../modules/callbacks.html#tensorpack.callbacks.RunUpdateOps) callback.
## The model does not run on CPUs?
Some TensorFlow ops are not implemented on CPUs.
For example, it does not support many ops in NCHW format on CPUs.
Note that if you use MKL-enabled version of TensorFlow, it supports more NCHW ops.
In general, you need to implement the model in a way your version of TensorFlow supports.
## My training seems slow. Why?
Checkout the [Performance Tuning tutorial](performance-tuning.html)
......@@ -66,7 +66,7 @@ setup(
],
tests_require=['flake8', 'scikit-image'],
extras_require={
'all': ['pillow', 'scipy', 'h5py', 'lmdb>=0.92', 'matplotlib', 'scikit-learn'],
'all': ['scipy', 'h5py', 'lmdb>=0.92', 'matplotlib', 'scikit-learn'],
'all: "linux" in sys_platform': ['python-prctl'],
'all: python_version < "3.0"': ['tornado'],
},
......
......@@ -43,7 +43,8 @@ for _, module_name, __ in iter_modules(
srcpath = os.path.join(_CURR_DIR, module_name + '.py')
if not os.path.isfile(srcpath):
continue
if not module_name.startswith('_') and \
if "_test" not in module_name and \
not module_name.startswith('_') and \
module_name not in __SKIP:
_global_import(module_name)
......
......@@ -48,5 +48,5 @@ else:
srcpath = os.path.join(_CURR_DIR, module_name + '.py')
if not os.path.isfile(srcpath):
continue
if not module_name.startswith('_'):
if not module_name.startswith('_') and "_test" not in module_name:
global_import(module_name)
......@@ -44,5 +44,7 @@ for _, module_name, _ in iter_modules(
continue
if module_name.startswith('_'):
continue
if "_test" in module_name:
continue
if module_name not in _SKIP:
_global_import(module_name)
......@@ -5,6 +5,9 @@
import logging
import unittest
import tensorflow as tf
import numpy as np
from .pool import FixedUnPooling
class TestModel(unittest.TestCase):
......@@ -24,6 +27,47 @@ class TestModel(unittest.TestCase):
return tf.Variable(args[0])
class TestPool(TestModel):
def test_FixedUnPooling(self):
h, w = 3, 4
scale = 2
mat = np.random.rand(h, w, 3).astype('float32')
inp = self.make_variable(mat)
inp = tf.reshape(inp, [1, h, w, 3])
output = FixedUnPooling('unpool', inp, scale)
res = self.run_variable(output)
self.assertEqual(res.shape, (1, scale * h, scale * w, 3))
# mat is on corner
ele = res[0, ::scale, ::scale, 0]
self.assertTrue((ele == mat[:, :, 0]).all())
# the rest are zeros
res[0, ::scale, ::scale, :] = 0
self.assertTrue((res == 0).all())
# Below was originally for the BilinearUpsample layer used in the HED example
# def test_BilinearUpSample(self):
# h, w = 12, 12
# scale = 2
#
# mat = np.random.rand(h, w).astype('float32')
# inp = self.make_variable(mat)
# inp = tf.reshape(inp, [1, h, w, 1])
#
# output = BilinearUpSample(inp, scale)
# res = self.run_variable(output)[0, :, :, 0]
#
# from skimage.transform import rescale
# res2 = rescale(mat, scale, mode='edge')
#
# diff = np.abs(res2 - res)
#
# # if not diff.max() < 1e-4:
# # import IPython
# # IPython.embed(config=IPython.terminal.ipapp.load_default_config())
# self.assertTrue(diff.max() < 1e-4, diff.max())
def run_test_case(case):
suite = unittest.TestLoader().loadTestsFromTestCase(case)
unittest.TextTestRunner(verbosity=2).run(suite)
......
......@@ -5,7 +5,6 @@ import numpy as np
from ..compat import tfv1 as tf # this should be avoided first in model code
from ..utils.argtools import get_data_format, shape2d
from ._test import TestModel
from .common import layer_register
from .shape_utils import StaticDynamicShape
from .tflayer import convert_to_tflayer_args
......@@ -137,44 +136,3 @@ def FixedUnPooling(x, shape, unpool_mat=None, data_format='channels_last'):
ret.set_shape(tf.TensorShape(output_shape.get_static()))
return ret
class TestPool(TestModel):
def test_FixedUnPooling(self):
h, w = 3, 4
scale = 2
mat = np.random.rand(h, w, 3).astype('float32')
inp = self.make_variable(mat)
inp = tf.reshape(inp, [1, h, w, 3])
output = FixedUnPooling('unpool', inp, scale)
res = self.run_variable(output)
self.assertEqual(res.shape, (1, scale * h, scale * w, 3))
# mat is on corner
ele = res[0, ::scale, ::scale, 0]
self.assertTrue((ele == mat[:, :, 0]).all())
# the rest are zeros
res[0, ::scale, ::scale, :] = 0
self.assertTrue((res == 0).all())
# Below was originally for the BilinearUpsample layer used in the HED example
# def test_BilinearUpSample(self):
# h, w = 12, 12
# scale = 2
#
# mat = np.random.rand(h, w).astype('float32')
# inp = self.make_variable(mat)
# inp = tf.reshape(inp, [1, h, w, 1])
#
# output = BilinearUpSample(inp, scale)
# res = self.run_variable(output)[0, :, :, 0]
#
# from skimage.transform import rescale
# res2 = rescale(mat, scale, mode='edge')
#
# diff = np.abs(res2 - res)
#
# # if not diff.max() < 1e-4:
# # import IPython
# # IPython.embed(config=IPython.terminal.ipapp.load_default_config())
# self.assertTrue(diff.max() < 1e-4, diff.max())
......@@ -2,14 +2,11 @@
# File: serialize.py
import os
import sys
import pickle
import msgpack
import msgpack_numpy
from . import logger
from .develop import create_dummy_func
msgpack_numpy.patch()
assert msgpack.version >= (0, 5, 2)
......@@ -19,7 +16,10 @@ __all__ = ['loads', 'dumps']
MAX_MSGPACK_LEN = 1000000000
def dumps_msgpack(obj):
class MsgpackSerializer(object):
@staticmethod
def dumps(obj):
"""
Serialize an object.
......@@ -28,8 +28,8 @@ def dumps_msgpack(obj):
"""
return msgpack.dumps(obj, use_bin_type=True)
def loads_msgpack(buf):
@staticmethod
def loads(buf):
"""
Args:
buf: the output of `dumps`.
......@@ -43,7 +43,9 @@ def loads_msgpack(buf):
max_str_len=MAX_MSGPACK_LEN)
def dumps_pyarrow(obj):
class PyarrowSerializer(object):
@staticmethod
def dumps(obj):
"""
Serialize an object.
......@@ -51,51 +53,53 @@ def dumps_pyarrow(obj):
Implementation-dependent bytes-like object.
May not be compatible across different versions of pyarrow.
"""
import pyarrow as pa
return pa.serialize(obj).to_buffer()
@staticmethod
def dumps_bytes(obj):
"""
Returns:
bytes
"""
return PyarrowSerializer.dumps(obj).to_pybytes()
def loads_pyarrow(buf):
@staticmethod
def loads(buf):
"""
Args:
buf: the output of `dumps`.
buf: the output of `dumps` or `dumps_bytes`.
"""
return pa.deserialize(buf)
# import pyarrow has a lot of side effect:
# https://github.com/apache/arrow/pull/2329
# https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/TMqRaT-H2bI
# So we use msgpack as default.
if os.environ.get('TENSORPACK_SERIALIZE', 'msgpack') == 'pyarrow':
try:
import pyarrow as pa
except ImportError:
loads_pyarrow = create_dummy_func('loads_pyarrow', ['pyarrow']) # noqa
dumps_pyarrow = create_dummy_func('dumps_pyarrow', ['pyarrow']) # noqa
if 'horovod' in sys.modules:
logger.warn("Horovod and pyarrow may have symbol conflicts. "
"Uninstall pyarrow and use msgpack instead.")
loads = loads_pyarrow
dumps = dumps_pyarrow
else:
loads = loads_msgpack
dumps = dumps_msgpack
return pa.deserialize(buf)
class NonPicklableWrapper(object):
class PickleSerializer(object):
@staticmethod
def dumps(obj):
"""
Returns:
bytes
"""
TODO
return pickle.dumps(obj, protocol=-1)
https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py
@staticmethod
def loads(buf):
"""
def __init__(self, obj):
self._obj = obj
Args:
bytes
"""
return pickle.loads(buf)
def __reduce__(self):
import dill
s = dill.dumps(self._obj)
return dill.loads, (s, )
def __call__(self, *args, **kwargs):
return self._obj(*args, **kwargs)
_DEFAULT_S = os.environ.get('TENSORPACK_SERIALIZE', 'msgpack')
if _DEFAULT_S == "pyarrow":
dumps = PyarrowSerializer.dumps_bytes
loads = PyarrowSerializer.loads
elif _DEFAULT_S == "pickle":
dumps = PickleSerializer.dumps
loads = PickleSerializer.loads
else:
dumps = MsgpackSerializer.dumps
loads = MsgpackSerializer.loads
......@@ -14,11 +14,14 @@ python -c "from tensorflow.python.training.monitored_session import _HookedSessi
python -c "import tensorflow as tf; tf.Operation._add_control_input"
# run tests
python -m tensorpack.callbacks.param_test
python -m tensorpack.tfutils.unit_tests
python -m unittest tensorpack.dataflow.imgaug._test
python -m unittest tensorpack.callbacks.param_test
python -m unittest tensorpack.tfutils.unit_tests
python -m unittest tensorpack.dataflow.imgaug.imgaug_test
python -m unittest tensorpack.models.models_test
# use pyarrow after we organize the serializers.
# TENSORPACK_SERIALIZE=pyarrow python test_serializer.py
TENSORPACK_SERIALIZE=msgpack python test_serializer.py
# TENSORPACK_SERIALIZE=pyarrow python ...
python -m unittest tensorpack.dataflow.serialize_test
# e2e tests
python -m unittest discover -v
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment