Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
61663400
Commit
61663400
authored
Nov 10, 2019
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add serialization benchmark & forking pickler
parent
23ab7001
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
116 additions
and
1 deletion
+116
-1
examples/FasterRCNN/dataset/dataset.py
examples/FasterRCNN/dataset/dataset.py
+1
-1
tensorpack/utils/serialize.py
tensorpack/utils/serialize.py
+17
-0
tests/benchmark-serializer.py
tests/benchmark-serializer.py
+98
-0
No files found.
examples/FasterRCNN/dataset/dataset.py
View file @
61663400
...
...
@@ -24,7 +24,7 @@ class DatasetSplit():
boxes: numpy array of kx4 floats, each row is [x1, y1, x2, y2]
class: numpy array of k integers, in the range of [1, #categories], NOT [0, #categories)
is_crowd: k booleans. Use k False if you don't know what it means.
segmentation: k lists of numpy arrays
(one for each instance)
.
segmentation: k lists of numpy arrays.
Each list of numpy arrays corresponds to the mask for one instance.
Each numpy array in the list is a polygon of shape Nx2,
because one mask can be represented by N polygons.
...
...
tensorpack/utils/serialize.py
View file @
61663400
...
...
@@ -4,6 +4,7 @@
import
os
import
pickle
from
multiprocessing.reduction
import
ForkingPickler
import
msgpack
import
msgpack_numpy
...
...
@@ -92,6 +93,7 @@ class PickleSerializer(object):
return
pickle
.
loads
(
buf
)
# Define the default serializer to be used that dumps data to bytes
_DEFAULT_S
=
os
.
environ
.
get
(
'TENSORPACK_SERIALIZE'
,
'msgpack'
)
if
_DEFAULT_S
==
"pyarrow"
:
...
...
@@ -103,3 +105,18 @@ elif _DEFAULT_S == "pickle":
else
:
dumps
=
MsgpackSerializer
.
dumps
loads
=
MsgpackSerializer
.
loads
# Define the default serializer to be used for passing data
# among a pair of peers. In this case the deserialization is
# known to happen only once
_DEFAULT_S
=
os
.
environ
.
get
(
'TENSORPACK_ONCE_SERIALIZE'
,
'pickle'
)
if
_DEFAULT_S
==
"pyarrow"
:
dumps_once
=
PyarrowSerializer
.
dumps
loads_once
=
PyarrowSerializer
.
loads
elif
_DEFAULT_S
==
"pickle"
:
dumps_once
=
ForkingPickler
.
dumps
loads_once
=
ForkingPickler
.
loads
else
:
dumps_once
=
MsgpackSerializer
.
dumps
loads_once
=
MsgpackSerializer
.
loads
tests/benchmark-serializer.py
0 → 100644
View file @
61663400
#!/usr/bin/env python3
import
numpy
as
np
import
argparse
import
pyarrow
as
pa
from
tabulate
import
tabulate
import
operator
from
tensorpack.utils
import
logger
from
tensorpack.utils.serialize
import
(
MsgpackSerializer
,
PyarrowSerializer
,
PickleSerializer
,
ForkingPickler
,
)
from
tensorpack.utils.timer
import
Timer
def
benchmark_serializer
(
dumps
,
loads
,
data
,
num
):
buf
=
dumps
(
data
)
enc_timer
=
Timer
()
dec_timer
=
Timer
()
enc_timer
.
pause
()
dec_timer
.
pause
()
for
k
in
range
(
num
):
enc_timer
.
resume
()
buf
=
dumps
(
data
)
enc_timer
.
pause
()
dec_timer
.
resume
()
loads
(
buf
)
dec_timer
.
pause
()
dumps_time
=
enc_timer
.
seconds
()
/
num
loads_time
=
dec_timer
.
seconds
()
/
num
return
dumps_time
,
loads_time
def
display_results
(
name
,
results
):
logger
.
info
(
"Encoding benchmark for {}:"
.
format
(
name
))
data
=
sorted
([(
x
,
y
[
0
])
for
x
,
y
in
results
],
key
=
operator
.
itemgetter
(
1
))
print
(
tabulate
(
data
,
floatfmt
=
'.5f'
))
logger
.
info
(
"Decoding benchmark for {}:"
.
format
(
name
))
data
=
sorted
([(
x
,
y
[
1
])
for
x
,
y
in
results
],
key
=
operator
.
itemgetter
(
1
))
print
(
tabulate
(
data
,
floatfmt
=
'.5f'
))
def
benchmark_all
(
name
,
serializers
,
data
,
num
=
30
):
logger
.
info
(
"Benchmarking {} ..."
.
format
(
name
))
results
=
[]
for
serializer_name
,
dumps
,
loads
in
serializers
:
results
.
append
((
serializer_name
,
benchmark_serializer
(
dumps
,
loads
,
data
,
num
=
num
)))
display_results
(
name
,
results
)
def
fake_json_data
():
return
{
'words'
:
"""
Lorem ipsum dolor sit amet, consectetur adipiscing
elit. Mauris adipiscing adipiscing placerat.
Vestibulum augue augue,
pellentesque quis sollicitudin id, adipiscing.
"""
*
100
,
'list'
:
list
(
range
(
100
))
*
500
,
'dict'
:
dict
((
str
(
i
),
'a'
)
for
i
in
range
(
50000
)),
'dict2'
:
dict
((
i
,
'a'
)
for
i
in
range
(
50000
)),
'int'
:
3000
,
'float'
:
100.123456
}
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"task"
)
args
=
parser
.
parse_args
()
serializers
=
[
(
"msgpack"
,
MsgpackSerializer
.
dumps
,
MsgpackSerializer
.
loads
),
(
"pyarrow-buf"
,
PyarrowSerializer
.
dumps
,
PyarrowSerializer
.
loads
),
(
"pyarrow-bytes"
,
PyarrowSerializer
.
dumps_bytes
,
PyarrowSerializer
.
loads
),
(
"pickle"
,
PickleSerializer
.
dumps
,
PickleSerializer
.
loads
),
(
"forking-pickle"
,
ForkingPickler
.
dumps
,
ForkingPickler
.
loads
),
]
if
args
.
task
==
"numpy"
:
numpy_data
=
[
np
.
random
.
rand
(
64
,
224
,
224
,
3
)
.
astype
(
"float32"
),
np
.
random
.
rand
(
64
)
.
astype
(
'int32'
)]
benchmark_all
(
"numpy data"
,
serializers
,
numpy_data
)
elif
args
.
task
==
"json"
:
benchmark_all
(
"json data"
,
serializers
,
fake_json_data
(),
num
=
50
)
elif
args
.
task
==
"torch"
:
import
torch
from
pyarrow.lib
import
_default_serialization_context
pa
.
register_torch_serialization_handlers
(
_default_serialization_context
)
torch_data
=
[
torch
.
rand
(
64
,
224
,
224
,
3
),
torch
.
rand
(
64
)
.
to
(
dtype
=
torch
.
int32
)]
benchmark_all
(
"torch data"
,
serializers
[
1
:],
torch_data
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment