Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
ba9d1793
Commit
ba9d1793
authored
Aug 21, 2019
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
exception handling in ZMQ runner
parent
be51dd88
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
35 additions
and
2 deletions
+35
-2
tensorpack/dataflow/parallel.py
tensorpack/dataflow/parallel.py
+35
-2
No files found.
tensorpack/dataflow/parallel.py
View file @
ba9d1793
...
...
@@ -2,7 +2,9 @@
# File: parallel.py
import
atexit
import
pickle
import
errno
import
traceback
import
itertools
import
multiprocessing
as
mp
import
os
...
...
@@ -25,6 +27,25 @@ __all__ = ['PrefetchData', 'MultiProcessPrefetchData',
'PrefetchDataZMQ'
,
'MultiThreadPrefetchData'
]
# from https://github.com/pytorch/pytorch/blob/master/torch/utils/data/_utils/__init__.py
class
_ExceptionWrapper
:
MAGIC
=
b
"EXC_MAGIC"
"""Wraps an exception plus traceback to communicate across threads"""
def
__init__
(
self
,
exc_info
):
# It is important that we don't store exc_info, see
# NOTE [ Python Traceback Reference Cycle Problem ]
self
.
exc_type
=
exc_info
[
0
]
self
.
exc_msg
=
""
.
join
(
traceback
.
format_exception
(
*
exc_info
))
def
pack
(
self
):
return
self
.
MAGIC
+
pickle
.
dumps
(
self
)
@
staticmethod
def
unpack
(
dp
):
if
isinstance
(
dp
,
bytes
)
and
dp
.
startswith
(
_ExceptionWrapper
.
MAGIC
):
return
pickle
.
loads
(
dp
[
len
(
_ExceptionWrapper
.
MAGIC
):])
def
_repeat_iter
(
get_itr
):
while
True
:
for
x
in
get_itr
():
...
...
@@ -291,14 +312,21 @@ class MultiProcessRunnerZMQ(_MultiProcessZMQDataFlow):
def
run
(
self
):
enable_death_signal
(
_warn
=
self
.
idx
==
0
)
self
.
ds
.
reset_state
()
itr
=
_repeat_iter
(
lambda
:
self
.
ds
)
context
=
zmq
.
Context
()
socket
=
context
.
socket
(
zmq
.
PUSH
)
socket
.
set_hwm
(
self
.
hwm
)
socket
.
connect
(
self
.
conn_name
)
try
:
while
True
:
for
dp
in
self
.
ds
:
try
:
dp
=
next
(
itr
)
socket
.
send
(
dumps
(
dp
),
copy
=
False
)
except
Exception
:
dp
=
_ExceptionWrapper
(
sys
.
exc_info
())
.
pack
()
socket
.
send
(
dumps
(
dp
),
copy
=
False
)
raise
# sigint could still propagate here, e.g. when nested
except
KeyboardInterrupt
:
pass
...
...
@@ -332,7 +360,12 @@ class MultiProcessRunnerZMQ(_MultiProcessZMQDataFlow):
self
.
_size
=
-
1
def
_recv
(
self
):
return
loads
(
self
.
socket
.
recv
(
copy
=
False
))
ret
=
loads
(
self
.
socket
.
recv
(
copy
=
False
))
exc
=
_ExceptionWrapper
.
unpack
(
ret
)
if
exc
is
not
None
:
logger
.
error
(
"Exception '{}' in worker:"
.
format
(
str
(
exc
.
exc_type
)))
raise
exc
.
exc_type
(
exc
.
exc_msg
)
return
ret
def
__len__
(
self
):
return
self
.
ds
.
__len__
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment