Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
58c2779f
Commit
58c2779f
authored
Jan 13, 2018
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
rename and docs
parent
2a4d248f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
9 deletions
+17
-9
tensorpack/dataflow/parallel.py
tensorpack/dataflow/parallel.py
+11
-7
tensorpack/dataflow/parallel_map.py
tensorpack/dataflow/parallel_map.py
+6
-2
No files found.
tensorpack/dataflow/parallel.py
View file @
58c2779f
...
@@ -21,8 +21,8 @@ from ..utils.serialize import loads, dumps
...
@@ -21,8 +21,8 @@ from ..utils.serialize import loads, dumps
from
..utils
import
logger
from
..utils
import
logger
from
..utils.gpu
import
change_gpu
from
..utils.gpu
import
change_gpu
__all__
=
[
'PrefetchData'
,
'
PrefetchDataZMQ'
,
'PrefetchOnGPUs
'
,
__all__
=
[
'PrefetchData'
,
'
MultiProcessPrefetchData
'
,
'MultiThreadPrefetchData'
]
'
PrefetchDataZMQ'
,
'PrefetchOnGPUs'
,
'
MultiThreadPrefetchData'
]
def
_repeat_iter
(
get_itr
):
def
_repeat_iter
(
get_itr
):
...
@@ -112,7 +112,7 @@ class _MultiProcessZMQDataFlow(DataFlow):
...
@@ -112,7 +112,7 @@ class _MultiProcessZMQDataFlow(DataFlow):
pass
pass
class
PrefetchData
(
ProxyDataFlow
):
class
MultiProcess
PrefetchData
(
ProxyDataFlow
):
"""
"""
Prefetch data from a DataFlow using Python multiprocessing utilities.
Prefetch data from a DataFlow using Python multiprocessing utilities.
It will fork the process calling :meth:`__init__`, collect datapoints from `ds` in each
It will fork the process calling :meth:`__init__`, collect datapoints from `ds` in each
...
@@ -135,7 +135,7 @@ class PrefetchData(ProxyDataFlow):
...
@@ -135,7 +135,7 @@ class PrefetchData(ProxyDataFlow):
class
_Worker
(
mp
.
Process
):
class
_Worker
(
mp
.
Process
):
def
__init__
(
self
,
ds
,
queue
):
def
__init__
(
self
,
ds
,
queue
):
super
(
PrefetchData
.
_Worker
,
self
)
.
__init__
()
super
(
MultiProcess
PrefetchData
.
_Worker
,
self
)
.
__init__
()
self
.
ds
=
ds
self
.
ds
=
ds
self
.
queue
=
queue
self
.
queue
=
queue
...
@@ -153,7 +153,7 @@ class PrefetchData(ProxyDataFlow):
...
@@ -153,7 +153,7 @@ class PrefetchData(ProxyDataFlow):
nr_prefetch (int): size of the queue to hold prefetched datapoints.
nr_prefetch (int): size of the queue to hold prefetched datapoints.
nr_proc (int): number of processes to use.
nr_proc (int): number of processes to use.
"""
"""
super
(
PrefetchData
,
self
)
.
__init__
(
ds
)
super
(
MultiProcess
PrefetchData
,
self
)
.
__init__
(
ds
)
try
:
try
:
self
.
_size
=
ds
.
size
()
self
.
_size
=
ds
.
size
()
except
NotImplementedError
:
except
NotImplementedError
:
...
@@ -163,11 +163,11 @@ class PrefetchData(ProxyDataFlow):
...
@@ -163,11 +163,11 @@ class PrefetchData(ProxyDataFlow):
self
.
_guard
=
DataFlowReentrantGuard
()
self
.
_guard
=
DataFlowReentrantGuard
()
if
nr_proc
>
1
:
if
nr_proc
>
1
:
logger
.
info
(
"[PrefetchData] Will fork a dataflow more than one times. "
logger
.
info
(
"[
MultiProcess
PrefetchData] Will fork a dataflow more than one times. "
"This assumes the datapoints are i.i.d."
)
"This assumes the datapoints are i.i.d."
)
self
.
queue
=
mp
.
Queue
(
self
.
nr_prefetch
)
self
.
queue
=
mp
.
Queue
(
self
.
nr_prefetch
)
self
.
procs
=
[
PrefetchData
.
_Worker
(
self
.
ds
,
self
.
queue
)
self
.
procs
=
[
MultiProcess
PrefetchData
.
_Worker
(
self
.
ds
,
self
.
queue
)
for
_
in
range
(
self
.
nr_proc
)]
for
_
in
range
(
self
.
nr_proc
)]
ensure_proc_terminate
(
self
.
procs
)
ensure_proc_terminate
(
self
.
procs
)
start_proc_mask_signal
(
self
.
procs
)
start_proc_mask_signal
(
self
.
procs
)
...
@@ -185,6 +185,9 @@ class PrefetchData(ProxyDataFlow):
...
@@ -185,6 +185,9 @@ class PrefetchData(ProxyDataFlow):
pass
pass
PrefetchData
=
MultiProcessPrefetchData
class
PrefetchDataZMQ
(
_MultiProcessZMQDataFlow
):
class
PrefetchDataZMQ
(
_MultiProcessZMQDataFlow
):
"""
"""
Prefetch data from a DataFlow using multiple processes, with ZeroMQ for
Prefetch data from a DataFlow using multiple processes, with ZeroMQ for
...
@@ -329,6 +332,7 @@ class MultiThreadPrefetchData(DataFlow):
...
@@ -329,6 +332,7 @@ class MultiThreadPrefetchData(DataFlow):
def
__init__
(
self
,
get_df
,
queue
):
def
__init__
(
self
,
get_df
,
queue
):
super
(
MultiThreadPrefetchData
.
_Worker
,
self
)
.
__init__
()
super
(
MultiThreadPrefetchData
.
_Worker
,
self
)
.
__init__
()
self
.
df
=
get_df
()
self
.
df
=
get_df
()
assert
isinstance
(
self
.
df
,
DataFlow
),
self
.
df
self
.
queue
=
queue
self
.
queue
=
queue
self
.
daemon
=
True
self
.
daemon
=
True
...
...
tensorpack/dataflow/parallel_map.py
View file @
58c2779f
...
@@ -294,6 +294,8 @@ MultiProcessMapData = MultiProcessMapDataZMQ # alias
...
@@ -294,6 +294,8 @@ MultiProcessMapData = MultiProcessMapDataZMQ # alias
def
_pool_map
(
data
):
def
_pool_map
(
data
):
global
SHARED_ARR
,
WORKER_ID
,
MAP_FUNC
global
SHARED_ARR
,
WORKER_ID
,
MAP_FUNC
res
=
MAP_FUNC
(
data
)
res
=
MAP_FUNC
(
data
)
if
res
is
None
:
return
None
shared
=
np
.
reshape
(
SHARED_ARR
,
res
.
shape
)
shared
=
np
.
reshape
(
SHARED_ARR
,
res
.
shape
)
assert
shared
.
dtype
==
res
.
dtype
assert
shared
.
dtype
==
res
.
dtype
shared
[:]
=
res
shared
[:]
=
res
...
@@ -303,8 +305,8 @@ def _pool_map(data):
...
@@ -303,8 +305,8 @@ def _pool_map(data):
class
MultiProcessMapDataComponentSharedArray
(
DataFlow
):
class
MultiProcessMapDataComponentSharedArray
(
DataFlow
):
"""
"""
Similar to :class:`MapDataComponent`, but perform IPC by shared memory,
Similar to :class:`MapDataComponent`, but perform IPC by shared memory,
therefore more efficient
. It requires `map_func` to always return
therefore more efficient
when data (result of map_func) is large.
a numpy array of fixed shape and dtype, or None.
It requires `map_func` to always return
a numpy array of fixed shape and dtype, or None.
"""
"""
def
__init__
(
self
,
ds
,
nr_proc
,
map_func
,
output_shape
,
output_dtype
,
index
=
0
):
def
__init__
(
self
,
ds
,
nr_proc
,
map_func
,
output_shape
,
output_dtype
,
index
=
0
):
"""
"""
...
@@ -370,6 +372,8 @@ class MultiProcessMapDataComponentSharedArray(DataFlow):
...
@@ -370,6 +372,8 @@ class MultiProcessMapDataComponentSharedArray(DataFlow):
res
=
self
.
_pool
.
map_async
(
_pool_map
,
to_map
)
res
=
self
.
_pool
.
map_async
(
_pool_map
,
to_map
)
for
index
in
res
.
get
():
for
index
in
res
.
get
():
if
index
is
None
:
continue
arr
=
np
.
reshape
(
self
.
_shared_mem
[
index
],
self
.
output_shape
)
arr
=
np
.
reshape
(
self
.
_shared_mem
[
index
],
self
.
output_shape
)
dp
=
dps
[
index
]
dp
=
dps
[
index
]
dp
[
self
.
index
]
=
arr
.
copy
()
dp
[
self
.
index
]
=
arr
.
copy
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment