Commit d8d3ed1a authored by Yuxin Wu's avatar Yuxin Wu

replace msgpack by pyarrow

parent a3272aad
...@@ -8,6 +8,8 @@ so you won't need to look at here very often. ...@@ -8,6 +8,8 @@ so you won't need to look at here very often.
Here are a list of things that were changed, starting from an early version. Here are a list of things that were changed, starting from an early version.
TensorFlow itself also changed APIs before 1.0 and those are not listed here. TensorFlow itself also changed APIs before 1.0 and those are not listed here.
+ [2018/01/09] Switched to pyarrow for faster serialization/deserialization on buffer-like objects.
Old serialized data may not be compatible with future versions.
+ [2017/10/21] + [2017/10/21]
tensorpack is gradually switching to a new Trainer API. tensorpack is gradually switching to a new Trainer API.
The old API will keep working for a while. See [issue](https://github.com/ppwwyyxx/tensorpack/issues/458) The old API will keep working for a while. See [issue](https://github.com/ppwwyyxx/tensorpack/issues/458)
......
...@@ -30,7 +30,7 @@ MOCK_MODULES = ['tabulate', 'h5py', ...@@ -30,7 +30,7 @@ MOCK_MODULES = ['tabulate', 'h5py',
'scipy', 'scipy.misc', 'scipy.io', 'scipy', 'scipy.misc', 'scipy.io',
'tornado', 'tornado.concurrent', 'tornado', 'tornado.concurrent',
'horovod', 'horovod.tensorflow', 'horovod', 'horovod.tensorflow',
'msgpack', 'msgpack_numpy', 'pyarrow',
'functools32'] 'functools32']
for mod_name in MOCK_MODULES: for mod_name in MOCK_MODULES:
sys.modules[mod_name] = mock.Mock(name=mod_name) sys.modules[mod_name] = mock.Mock(name=mod_name)
......
...@@ -3,8 +3,7 @@ six ...@@ -3,8 +3,7 @@ six
termcolor>=1.1 termcolor>=1.1
tabulate>=0.7.7 tabulate>=0.7.7
tqdm>4.11.1 tqdm>4.11.1
msgpack pyarrow
msgpack-numpy>=0.3.9
pyzmq>=16 pyzmq>=16
subprocess32; python_version < '3.0' subprocess32; python_version < '3.0'
functools32; python_version < '3.0' functools32; python_version < '3.0'
...@@ -32,7 +32,7 @@ def send_dataflow_zmq(df, addr, hwm=50, format=None): ...@@ -32,7 +32,7 @@ def send_dataflow_zmq(df, addr, hwm=50, format=None):
addr: a ZMQ socket endpoint. addr: a ZMQ socket endpoint.
hwm (int): ZMQ high-water mark (buffer size) hwm (int): ZMQ high-water mark (buffer size)
format (str): The serialization format. format (str): The serialization format.
Default format would use :mod:`tensorpack.utils.serialize` (i.e. msgpack). Default format would use :mod:`tensorpack.utils.serialize`.
An alternate format is 'zmq_op', used by https://github.com/tensorpack/zmq_ops. An alternate format is 'zmq_op', used by https://github.com/tensorpack/zmq_ops.
""" """
assert format in [None, 'zmq_op'] assert format in [None, 'zmq_op']
......
...@@ -3,10 +3,7 @@ ...@@ -3,10 +3,7 @@
# File: serialize.py # File: serialize.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com> # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import msgpack import pyarrow as pa
import msgpack_numpy
msgpack_numpy.patch()
__all__ = ['loads', 'dumps'] __all__ = ['loads', 'dumps']
...@@ -17,14 +14,20 @@ def dumps(obj): ...@@ -17,14 +14,20 @@ def dumps(obj):
Serialize an object. Serialize an object.
Returns: Returns:
str Implementation-dependent bytes-like object
""" """
return msgpack.dumps(obj, use_bin_type=True) return pa.serialize(obj).to_buffer()
def loads(buf): def loads(buf):
""" """
Args: Args:
buf (str): serialized object. buf: the output of `dumps`.
""" """
return msgpack.loads(buf, encoding='utf-8') try:
return pa.deserialize(buf)
except pa.ArrowIOError:
# Handle data serialized by old version of tensorpack.
import msgpack
import msgpack_numpy as mn
return msgpack.unpackb(buf, object_hook=mn.decode, encoding='utf-8')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment