Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
feaae168
Commit
feaae168
authored
Feb 10, 2017
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
logging for concurrency utilities
parent
c68686e6
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
37 additions
and
12 deletions
+37
-12
docs/tutorial/efficient-data.md
docs/tutorial/efficient-data.md
+12
-0
examples/A3C-Gym/README.md
examples/A3C-Gym/README.md
+1
-1
tensorpack/callbacks/concurrency.py
tensorpack/callbacks/concurrency.py
+6
-2
tensorpack/dataflow/common.py
tensorpack/dataflow/common.py
+7
-7
tensorpack/dataflow/dataset/ilsvrc.py
tensorpack/dataflow/dataset/ilsvrc.py
+1
-1
tensorpack/predict/concurrency.py
tensorpack/predict/concurrency.py
+9
-1
tensorpack/train/multigpu.py
tensorpack/train/multigpu.py
+1
-0
No files found.
docs/tutorial/efficient-data.md
View file @
feaae168
...
...
@@ -66,6 +66,18 @@ dataset instead of a down-sampled version here.
The average resolution is about 400x350
<sup>
[[1]]
</sup>
.
The original images (JPEG compressed) are 140G in total.
We start from a simple DataFlow:
```
python
from
tensorpack
import
*
ds
=
dataset
.
ILSVRC12
(
'/path/to/ILSVRC12'
,
'train'
,
shuffle
=
True
)
ds
=
BatchData
(
ds
,
256
,
use_list
=
True
)
TestDataSpeed
(
ds
)
.
start_test
()
```
Here the first
`ds`
simply reads original images from filesystem, and second
`ds`
batch them, so
that we can test the speed of this DataFlow in the unit of batch per second. By default
`BatchData`
will concatenate the data into an ndarray, but since images are originally of different shapes, we use
`use_list=True`
so that it just produces lists.
[
1
]:
#ref
...
...
examples/A3C-Gym/README.md
View file @
feaae168
...
...
@@ -16,7 +16,7 @@ probably because of async issues.
The pre-trained models are all trained with 4 GPUs for about 2 days.
But note that multi-GPU doesn't give you obvious speedup here,
because the bottleneck is not computation but data. On machines without huge memory, you may also need to
because the bottleneck i
n this implementation i
s not computation but data. On machines without huge memory, you may also need to
enable tcmalloc to keep training throughput more stable.
Occasionally, processes may not get terminated completely, therefore it is suggested to use
`systemd-run`
to run any
...
...
tensorpack/callbacks/concurrency.py
View file @
feaae168
...
...
@@ -44,8 +44,12 @@ class StartProcOrThread(Callback):
if
isinstance
(
k
,
mp
.
Process
):
logger
.
info
(
"Stopping {} ..."
.
format
(
k
.
name
))
k
.
terminate
()
k
.
join
()
k
.
join
(
5.0
)
if
k
.
is_alive
():
logger
.
error
(
"Cannot join process {}."
.
format
(
k
.
name
))
elif
isinstance
(
k
,
StoppableThread
):
logger
.
info
(
"Stopping {} ..."
.
format
(
k
.
name
))
k
.
stop
()
k
.
join
()
k
.
join
(
5.0
)
if
k
.
is_alive
():
logger
.
error
(
"Cannot join thread {}."
.
format
(
k
.
name
))
tensorpack/dataflow/common.py
View file @
feaae168
...
...
@@ -56,7 +56,7 @@ class BatchData(ProxyDataFlow):
of the original datapoints.
"""
def
__init__
(
self
,
ds
,
batch_size
,
remainder
=
False
,
allow
_list
=
False
):
def
__init__
(
self
,
ds
,
batch_size
,
remainder
=
False
,
use
_list
=
False
):
"""
Args:
ds (DataFlow): Its components must be either scalars or :class:`np.ndarray`.
...
...
@@ -65,7 +65,7 @@ class BatchData(ProxyDataFlow):
remainder (bool): whether to return the remaining data smaller than a batch_size.
If set True, it will possibly generates a data point of a smaller batch size.
Otherwise, all generated data are guranteed to have the same size.
allow
_list (bool): if True, it will run faster by producing a list
use
_list (bool): if True, it will run faster by producing a list
of datapoints instead of an ndarray of datapoints, avoiding an
extra copy.
"""
...
...
@@ -77,7 +77,7 @@ class BatchData(ProxyDataFlow):
pass
self
.
batch_size
=
batch_size
self
.
remainder
=
remainder
self
.
allow_list
=
allow
_list
self
.
use_list
=
use
_list
def
size
(
self
):
ds_size
=
self
.
ds
.
size
()
...
...
@@ -96,17 +96,17 @@ class BatchData(ProxyDataFlow):
for
data
in
self
.
ds
.
get_data
():
holder
.
append
(
data
)
if
len
(
holder
)
==
self
.
batch_size
:
yield
BatchData
.
_aggregate_batch
(
holder
,
self
.
allow
_list
)
yield
BatchData
.
_aggregate_batch
(
holder
,
self
.
use
_list
)
del
holder
[:]
if
self
.
remainder
and
len
(
holder
)
>
0
:
yield
BatchData
.
_aggregate_batch
(
holder
,
self
.
allow
_list
)
yield
BatchData
.
_aggregate_batch
(
holder
,
self
.
use
_list
)
@
staticmethod
def
_aggregate_batch
(
data_holder
,
allow
_list
):
def
_aggregate_batch
(
data_holder
,
use
_list
):
size
=
len
(
data_holder
[
0
])
result
=
[]
for
k
in
range
(
size
):
if
allow
_list
:
if
use
_list
:
result
.
append
(
[
x
[
k
]
for
x
in
data_holder
])
else
:
...
...
tensorpack/dataflow/dataset/ilsvrc.py
View file @
feaae168
...
...
@@ -98,7 +98,7 @@ class ILSVRCMeta(object):
class
ILSVRC12
(
RNGDataFlow
):
"""
Produces ILSVRC12 images of shape [h, w, 3(BGR)], and a label between [0, 999],
Produces
uint8
ILSVRC12 images of shape [h, w, 3(BGR)], and a label between [0, 999],
and optionally a bounding box of [xmin, ymin, xmax, ymax].
"""
def
__init__
(
self
,
dir
,
name
,
meta_dir
=
None
,
shuffle
=
None
,
...
...
tensorpack/predict/concurrency.py
View file @
feaae168
...
...
@@ -6,7 +6,9 @@
import
multiprocessing
import
six
from
six.moves
import
queue
,
range
import
tensorflow
as
tf
from
..utils
import
logger
from
..utils.concurrency
import
DIE
,
StoppableThread
from
..tfutils.modelutils
import
describe_model
from
.base
import
OfflinePredictor
,
AsyncPredictorBase
...
...
@@ -83,7 +85,13 @@ class PredictorWorkerThread(StoppableThread):
def
run
(
self
):
while
not
self
.
stopped
():
batched
,
futures
=
self
.
fetch_batch
()
outputs
=
self
.
func
(
batched
)
try
:
outputs
=
self
.
func
(
batched
)
except
tf
.
errors
.
CancelledError
:
for
f
in
futures
:
f
.
cancel
()
logger
.
warn
(
"PredictorWorkerThread id={}, call was cancelled."
.
format
(
self
.
id
))
return
# print "Worker {} batched {} Queue {}".format(
# self.id, len(futures), self.queue.qsize())
# debug, for speed testing
...
...
tensorpack/train/multigpu.py
View file @
feaae168
...
...
@@ -178,6 +178,7 @@ class AsyncMultiGPUTrainer(MultiGPUTrainer,
self
.
sess
.
run
([
op
])
next
(
self
.
async_step_counter
)
th
=
LoopThread
(
f
)
th
.
name
=
"AsyncLoopThread-{}"
.
format
(
k
)
th
.
pause
()
th
.
start
()
self
.
training_threads
.
append
(
th
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment