Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
cddb713f
Commit
cddb713f
authored
Mar 29, 2017
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
make send_data and RemoteData useful (#202)
parent
3397e0bd
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
70 additions
and
31 deletions
+70
-31
tensorpack/dataflow/common.py
tensorpack/dataflow/common.py
+6
-0
tensorpack/dataflow/remote.py
tensorpack/dataflow/remote.py
+64
-31
No files found.
tensorpack/dataflow/common.py
View file @
cddb713f
...
...
@@ -47,6 +47,12 @@ class TestDataSpeed(ProxyDataFlow):
if
idx
==
self
.
test_size
-
1
:
break
def
start
(
self
):
"""
Alias of start_test.
"""
self
.
start_test
()
class
BatchData
(
ProxyDataFlow
):
"""
...
...
tensorpack/dataflow/remote.py
View file @
cddb713f
...
...
@@ -10,34 +10,33 @@ except ImportError:
logger
.
warn
(
"Error in 'import zmq'. remote feature won't be available"
)
__all__
=
[]
else
:
__all__
=
[
'se
rve_data'
,
'RemoteData
'
]
__all__
=
[
'se
nd_dataflow_zmq'
,
'RemoteDataZMQ
'
]
from
.base
import
DataFlow
from
.common
import
RepeatedData
from
..utils
import
logger
from
..utils.serialize
import
dumps
,
loads
def
se
rve_data
(
ds
,
addr
):
def
se
nd_dataflow_zmq
(
df
,
addr
,
hwm
=
50
):
"""
Serve the DataFlow on
a ZMQ socket addr.
Run DataFlow and send data to
a ZMQ socket addr.
It will dump and send each datapoint to this addr with a PUSH socket.
Args:
d
s (DataFlow): DataFlow to serve.
Will infinitely loop over the DataFlow.
d
f (DataFlow):
Will infinitely loop over the DataFlow.
addr: a ZMQ socket addr.
hwm (int): high water mark
"""
ctx
=
zmq
.
Context
()
socket
=
ctx
.
socket
(
zmq
.
PUSH
)
socket
.
set_hwm
(
10
)
socket
.
bind
(
addr
)
ds
=
RepeatedData
(
ds
,
-
1
)
socket
.
set_hwm
(
hwm
)
socket
.
connect
(
addr
)
try
:
d
s
.
reset_state
()
logger
.
info
(
"Serving data
at
{}"
.
format
(
addr
))
d
f
.
reset_state
()
logger
.
info
(
"Serving data
to
{}"
.
format
(
addr
))
# TODO print statistics such as speed
while
True
:
for
dp
in
d
s
.
get_data
():
for
dp
in
d
f
.
get_data
():
socket
.
send
(
dumps
(
dp
),
copy
=
False
)
finally
:
socket
.
setsockopt
(
zmq
.
LINGER
,
0
)
...
...
@@ -46,39 +45,73 @@ def serve_data(ds, addr):
ctx
.
destroy
(
0
)
class
RemoteData
(
DataFlow
):
""" Produce data from
a ZMQ socket
. """
def
__init__
(
self
,
addr
):
class
RemoteData
ZMQ
(
DataFlow
):
""" Produce data from
ZMQ PULL socket(s)
. """
def
__init__
(
self
,
addr
1
,
addr2
=
None
):
"""
Args:
addr (str): addr of the socket to connect to.
addr1,addr2 (str): addr of the socket to connect to.
Use both if you need two protocols (e.g. both IPC and TCP).
I don't think you'll ever need 3.
"""
self
.
_addr
=
addr
assert
addr1
self
.
_addr1
=
addr1
self
.
_addr2
=
addr2
def
get_data
(
self
):
try
:
ctx
=
zmq
.
Context
()
if
self
.
_addr2
is
None
:
socket
=
ctx
.
socket
(
zmq
.
PULL
)
socket
.
connect
(
self
.
_addr
)
socket
.
set_hwm
(
50
)
socket
.
bind
(
self
.
_addr1
)
while
True
:
dp
=
loads
(
socket
.
recv
(
copy
=
False
))
dp
=
loads
(
socket
.
recv
(
copy
=
False
)
.
bytes
)
yield
dp
else
:
socket1
=
ctx
.
socket
(
zmq
.
PULL
)
socket1
.
set_hwm
(
50
)
socket1
.
bind
(
self
.
_addr1
)
socket2
=
ctx
.
socket
(
zmq
.
PULL
)
socket2
.
set_hwm
(
50
)
socket2
.
bind
(
self
.
_addr2
)
poller
=
zmq
.
Poller
()
poller
.
register
(
socket1
,
zmq
.
POLLIN
)
poller
.
register
(
socket2
,
zmq
.
POLLIN
)
while
True
:
evts
=
poller
.
poll
()
for
sock
,
evt
in
evts
:
dp
=
loads
(
sock
.
recv
(
copy
=
False
)
.
bytes
)
yield
dp
finally
:
ctx
.
destroy
(
linger
=
0
)
if
__name__
==
'__main__'
:
import
sys
from
tqdm
import
tqdm
from
argparse
import
ArgumentParser
from
.raw
import
FakeData
addr
=
"tcp://127.0.0.1:8877"
if
sys
.
argv
[
1
]
==
'serve'
:
ds
=
FakeData
([(
128
,
244
,
244
,
3
)],
1000
)
serve_data
(
ds
,
addr
)
from
.common
import
TestDataSpeed
"""
Test the multi-producer single-consumer model
"""
parser
=
ArgumentParser
()
parser
.
add_argument
(
'-t'
,
'--task'
,
choices
=
[
'send'
,
'recv'
],
required
=
True
)
parser
.
add_argument
(
'-a'
,
'--addr1'
,
required
=
True
)
parser
.
add_argument
(
'-b'
,
'--addr2'
,
default
=
None
)
args
=
parser
.
parse_args
()
# tcp addr like "tcp://127.0.0.1:8877"
# ipc addr like "ipc:///tmp/ipc-test"
if
args
.
task
==
'send'
:
# use random=True to make it slow and cpu-consuming
ds
=
FakeData
([(
128
,
244
,
244
,
3
)],
1000
,
random
=
True
)
send_dataflow_zmq
(
ds
,
args
.
addr1
)
else
:
ds
=
RemoteData
(
addr
)
ds
=
RemoteData
ZMQ
(
args
.
addr1
,
args
.
addr2
)
logger
.
info
(
"Each DP is 73.5MB"
)
with
tqdm
(
total
=
10000
)
as
pbar
:
for
k
in
ds
.
get_data
():
pbar
.
update
()
TestDataSpeed
(
ds
)
.
start_test
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment