Commit a2b26f2b authored by Yuxin Wu's avatar Yuxin Wu

still use msgpack by default.... speed varies between situations; need more investigation

parent d8d3ed1a
......@@ -8,8 +8,6 @@ so you won't need to look at here very often.
Here are a list of things that were changed, starting from an early version.
TensorFlow itself also changed APIs before 1.0 and those are not listed here.
+ [2018/01/09] Switched to pyarrow for faster serialization/deserialization on buffer-like objects.
Old serialized data may not be compatible with future versions.
+ [2017/10/21]
tensorpack is gradually switching to a new Trainer API.
The old API will keep working for a while. See [issue](https://github.com/ppwwyyxx/tensorpack/issues/458)
......
......@@ -30,7 +30,7 @@ MOCK_MODULES = ['tabulate', 'h5py',
'scipy', 'scipy.misc', 'scipy.io',
'tornado', 'tornado.concurrent',
'horovod', 'horovod.tensorflow',
'pyarrow',
'pyarrow', 'msgpack', 'msgpack_numpy',
'functools32']
for mod_name in MOCK_MODULES:
sys.modules[mod_name] = mock.Mock(name=mod_name)
......
......@@ -3,7 +3,8 @@ six
termcolor>=1.1
tabulate>=0.7.7
tqdm>4.11.1
pyarrow
msgpack
msgpack-numpy>=0.4.0
pyzmq>=16
subprocess32; python_version < '3.0'
functools32; python_version < '3.0'
......@@ -3,7 +3,14 @@
# File: serialize.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import pyarrow as pa
import msgpack
import msgpack_numpy
msgpack_numpy.patch()
try:
import pyarrow as pa
except ImportError:
pass
__all__ = ['loads', 'dumps']
......@@ -12,6 +19,23 @@ __all__ = ['loads', 'dumps']
def dumps(obj):
"""
Serialize an object.
Returns:
Implementation-dependent bytes-like object
"""
return msgpack.dumps(obj, use_bin_type=True)
def loads(buf):
"""
Args:
buf: the output of `dumps`.
"""
return msgpack.loads(buf, encoding='utf-8')
def dumps_pyarrow(obj):
"""
Serialize an object.
Returns:
Implementation-dependent bytes-like object
......@@ -19,15 +43,9 @@ def dumps(obj):
return pa.serialize(obj).to_buffer()
def loads(buf):
def loads_pyarrow(buf):
"""
Args:
buf: the output of `dumps`.
"""
try:
return pa.deserialize(buf)
except pa.ArrowIOError:
# Handle data serialized by old version of tensorpack.
import msgpack
import msgpack_numpy as mn
return msgpack.unpackb(buf, object_hook=mn.decode, encoding='utf-8')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment