Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
e04d846a
Commit
e04d846a
authored
Jul 17, 2016
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
predictorfactory
parent
fefdcfb1
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
66 additions
and
58 deletions
+66
-58
examples/Atari2600/DQN.py
examples/Atari2600/DQN.py
+1
-3
examples/DisturbLabel/README.md
examples/DisturbLabel/README.md
+6
-4
examples/cifar-convnet.py
examples/cifar-convnet.py
+0
-1
tensorpack/callbacks/group.py
tensorpack/callbacks/group.py
+6
-5
tensorpack/callbacks/inference.py
tensorpack/callbacks/inference.py
+1
-1
tensorpack/models/batch_norm.py
tensorpack/models/batch_norm.py
+1
-0
tensorpack/train/base.py
tensorpack/train/base.py
+5
-5
tensorpack/train/multigpu.py
tensorpack/train/multigpu.py
+1
-6
tensorpack/train/trainer.py
tensorpack/train/trainer.py
+42
-33
tensorpack/utils/naming.py
tensorpack/utils/naming.py
+3
-0
No files found.
examples/Atari2600/DQN.py
View file @
e04d846a
...
...
@@ -203,7 +203,5 @@ if __name__ == '__main__':
config
=
get_config
()
if
args
.
load
:
config
.
session_init
=
SaverRestore
(
args
.
load
)
SimpleTrainer
(
config
)
.
train
()
# TODO test if queue trainer works
#QueueInputTrainer(config).train()
QueueInputTrainer
(
config
)
.
train
()
examples/DisturbLabel/README.md
View file @
e04d846a
...
...
@@ -5,16 +5,18 @@ I ran into the paper [DisturbLabel: Regularizing CNN on the Loss Layer](https://
which basically said that noisy data gives you better performance.
As many, I didn't believe the method and the results.
This is a simple mnist training script with DisturbLabel. It uses the architecture in the paper and
hyperparameters in my original
[
mnist example
](
../mnist-convnet.py
)
. The results surprised me:
This is a simple mnist training script with DisturbLabel. It uses the simple architecture in the paper, and
hyperparameters in my original
[
mnist example
](
../mnist-convnet.py
)
.
The results surprised me, clean labels give the worst accuracy:

Experiements were repeated 15 times for p=0, 10 times for p=0.02 & 0.05, and 5 times for other values
of p. All experiements run for 100 epochs, with lr decay, which are enough for them to converge.
I suppose the disturb method works as a random noise to prevent SGD from getting stuck.
However it didn't work for harder problems such as SVHN:
I suppose the disturb method works as a random noise that could prevent SGD from getting stuck, if
training data are too easy or too few.
It didn't work for harder problems such as SVHN:

...
...
examples/cifar-convnet.py
View file @
e04d846a
...
...
@@ -156,4 +156,3 @@ if __name__ == '__main__':
if
args
.
gpu
:
config
.
nr_tower
=
len
(
args
.
gpu
.
split
(
','
))
QueueInputTrainer
(
config
)
.
train
()
#SimpleTrainer(config).train()
tensorpack/callbacks/group.py
View file @
e04d846a
...
...
@@ -113,12 +113,13 @@ class Callbacks(Callback):
self
.
test_callback_context
=
TestCallbackContext
()
def
_setup_graph
(
self
):
for
cb
in
self
.
cbs
:
if
isinstance
(
cb
.
type
,
TrainCallbackType
):
cb
.
setup_graph
(
self
.
trainer
)
else
:
with
self
.
test_callback_context
.
create_context
(
self
.
trainer
):
with
tf
.
name_scope
(
None
):
for
cb
in
self
.
cbs
:
if
isinstance
(
cb
.
type
,
TrainCallbackType
):
cb
.
setup_graph
(
self
.
trainer
)
else
:
with
self
.
test_callback_context
.
create_context
(
self
.
trainer
):
cb
.
setup_graph
(
self
.
trainer
)
def
_before_train
(
self
):
for
cb
in
self
.
cbs
:
...
...
tensorpack/callbacks/inference.py
View file @
e04d846a
...
...
@@ -78,7 +78,7 @@ class InferenceRunner(Callback):
for
v
in
self
.
vcs
:
assert
isinstance
(
v
,
Inferencer
),
str
(
v
)
def
_
before_train
(
self
):
def
_
setup_graph
(
self
):
self
.
input_vars
=
self
.
trainer
.
model
.
reuse_input_vars
()
self
.
_find_output_tensors
()
input_names
=
[
x
.
name
for
x
in
self
.
input_vars
]
...
...
tensorpack/models/batch_norm.py
View file @
e04d846a
...
...
@@ -52,6 +52,7 @@ def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5):
batch_mean
=
tf
.
identity
(
batch_mean
,
'mean'
)
batch_var
=
tf
.
identity
(
batch_var
,
'variance'
)
# XXX hack....
emaname
=
'EMA'
in_main_tower
=
not
batch_mean
.
name
.
startswith
(
'towerp'
)
if
in_main_tower
:
...
...
tensorpack/train/base.py
View file @
e04d846a
...
...
@@ -39,7 +39,8 @@ class Trainer(object):
assert
isinstance
(
config
,
TrainConfig
),
type
(
config
)
self
.
config
=
config
self
.
model
=
config
.
model
self
.
extra_threads_procs
=
config
.
extra_threads_procs
self
.
model
.
get_input_vars
()
# ensure they are present
self
.
_extra_threads_procs
=
config
.
extra_threads_procs
@
abstractmethod
def
train
(
self
):
...
...
@@ -53,7 +54,7 @@ class Trainer(object):
@
abstractmethod
def
get_predict_func
(
self
,
input_names
,
output_names
):
""" return a
predictor function
"""
""" return a
online predictor
"""
pass
def
get_predict_funcs
(
self
,
input_names
,
output_names
,
n
):
...
...
@@ -61,8 +62,7 @@ class Trainer(object):
Can be overwritten by subclasses to exploit more
parallelism among funcs.
"""
return
[
self
.
get_predict_func
(
input_name
,
output_names
)
for
k
in
range
(
n
)]
return
[
self
.
get_predict_func
(
input_name
,
output_names
)
for
k
in
range
(
n
)]
def
trigger_epoch
(
self
):
self
.
_trigger_epoch
()
...
...
@@ -156,7 +156,7 @@ class Trainer(object):
with
self
.
sess
.
as_default
():
# avoid sigint get handled by other processes
start_proc_mask_signal
(
self
.
extra_threads_procs
)
start_proc_mask_signal
(
self
.
_
extra_threads_procs
)
def
process_grads
(
self
,
grads
):
g
=
[]
...
...
tensorpack/train/multigpu.py
View file @
e04d846a
...
...
@@ -59,7 +59,7 @@ class MultiGPUTrainer(QueueInputTrainer):
tf
.
add_to_collection
(
MOVING_SUMMARY_VARS_KEY
,
cost_var
)
tf
.
get_variable_scope
()
.
reuse_variables
()
# avoid repeated summary from each device
backup
=
backup_collection
(
self
.
SUMMARY_BACKUP_KEYS
)
backup
=
backup_collection
(
SUMMARY_BACKUP_KEYS
)
restore_collection
(
backup
)
return
grad_list
...
...
@@ -78,9 +78,6 @@ class SyncMultiGPUTrainer(MultiGPUTrainer):
summary_moving_average
(),
name
=
'train_op'
)
describe_model
()
with
freeze_collection
(
self
.
SUMMARY_BACKUP_KEYS
):
self
.
_build_predict_tower
()
# [debug]: do nothing in training
#self.train_op = self.dequed_inputs[0][0] + self.dequed_inputs[1][0]
self
.
main_loop
()
...
...
@@ -107,8 +104,6 @@ class AsyncMultiGPUTrainer(MultiGPUTrainer):
self
.
_start_async_threads
(
grad_list
)
with
freeze_collection
(
self
.
SUMMARY_BACKUP_KEYS
):
self
.
_build_predict_tower
()
self
.
main_loop
()
def
_start_async_threads
(
self
,
grad_list
):
...
...
tensorpack/train/trainer.py
View file @
e04d846a
...
...
@@ -15,10 +15,42 @@ from ..tfutils.modelutils import describe_model
from
..utils
import
*
from
..tfutils
import
*
from
..tfutils.summary
import
add_moving_summary
from
..predict
import
OnlinePredictor
__all__
=
[
'SimpleTrainer'
,
'QueueInputTrainer'
]
class
PredictorFactory
(
object
):
""" Make predictors for a trainer"""
PREFIX
=
'towerp'
def
__init__
(
self
,
trainer
,
towers
):
self
.
trainer
=
trainer
self
.
towers
=
towers
self
.
tower_built
=
False
def
get_predictor
(
self
,
input_names
,
output_names
,
tower
):
if
not
self
.
tower_built
:
self
.
_build_predict_tower
()
tower
=
self
.
towers
[
tower
%
len
(
self
.
towers
)]
raw_input_vars
=
get_vars_by_names
(
input_names
)
output_names
=
[
'{}{}/'
.
format
(
self
.
PREFIX
,
tower
)
+
n
for
n
in
output_names
]
output_vars
=
get_vars_by_names
(
output_names
)
return
OnlinePredictor
(
self
.
trainer
.
sess
,
raw_input_vars
,
output_vars
)
def
_build_predict_tower
(
self
):
# build_predict_tower might get called anywhere, but 'towerp' should be the outermost name scope
with
tf
.
name_scope
(
None
),
\
freeze_collection
(
SUMMARY_BACKUP_KEYS
):
inputs
=
self
.
trainer
.
model
.
get_input_vars
()
tf
.
get_variable_scope
()
.
reuse_variables
()
for
k
in
self
.
towers
:
logger
.
info
(
"Building graph for predictor tower {}..."
.
format
(
k
))
with
tf
.
device
(
'/gpu:{}'
.
format
(
k
)
if
k
>=
0
else
'/cpu:0'
),
\
tf
.
name_scope
(
'{}{}'
.
format
(
self
.
PREFIX
,
k
)):
self
.
trainer
.
model
.
build_graph
(
inputs
,
False
)
self
.
tower_built
=
True
class
SimpleTrainer
(
Trainer
):
def
run_step
(
self
):
data
=
next
(
self
.
data_producer
)
...
...
@@ -30,7 +62,7 @@ class SimpleTrainer(Trainer):
self
.
input_vars
=
model
.
get_input_vars
()
model
.
build_graph
(
self
.
input_vars
,
True
)
cost_var
=
model
.
get_cost
()
tf
.
add_to_collection
(
MOVING_SUMMARY_VARS_KEY
,
cost_var
)
add_moving_summary
(
cost_var
)
grads
=
self
.
config
.
optimizer
.
compute_gradients
(
cost_var
)
grads
=
self
.
process_grads
(
grads
)
...
...
@@ -55,11 +87,9 @@ class SimpleTrainer(Trainer):
self
.
_process_summary
(
summary_str
)
def
get_predict_func
(
self
,
input_names
,
output_names
):
input_vars
=
get_vars_by_names
(
input_names
)
for
v
in
input_vars
:
assert
v
in
self
.
input_vars
output_vars
=
get_vars_by_names
(
output_names
)
return
OnlinePredictor
(
self
.
sess
,
input_vars
,
output_vars
)
if
not
hasattr
(
self
,
'predictor_factory'
):
self
.
predictor_factory
=
PredictorFactory
(
self
,
[
0
])
return
self
.
predictor_factory
.
get_predictor
(
input_names
,
output_names
,
0
)
class
EnqueueThread
(
threading
.
Thread
):
def
__init__
(
self
,
trainer
):
...
...
@@ -102,8 +132,6 @@ class EnqueueThread(threading.Thread):
class
QueueInputTrainer
(
Trainer
):
""" Single GPU Trainer, takes input from a queue"""
SUMMARY_BACKUP_KEYS
=
[
tf
.
GraphKeys
.
SUMMARIES
,
MOVING_SUMMARY_VARS_KEY
]
def
__init__
(
self
,
config
,
input_queue
=
None
,
predict_tower
=
None
):
"""
:param config: a `TrainConfig` instance
...
...
@@ -120,10 +148,12 @@ class QueueInputTrainer(Trainer):
50
,
[
x
.
dtype
for
x
in
self
.
input_vars
],
name
=
'input_queue'
)
else
:
self
.
input_queue
=
input_queue
if
predict_tower
is
None
:
# by default, use the first training gpu for prediction
predict_tower
=
[
0
]
self
.
predict_tower
=
predict_tower
self
.
predictor_factory
=
PredictorFactory
(
self
,
predict_tower
)
self
.
dequed_inputs
=
None
def
_get_model_inputs
(
self
):
...
...
@@ -136,15 +166,6 @@ class QueueInputTrainer(Trainer):
qv
.
set_shape
(
v
.
get_shape
())
return
ret
def
_build_predict_tower
(
self
):
inputs
=
self
.
model
.
get_input_vars
()
tf
.
get_variable_scope
()
.
reuse_variables
()
for
k
in
self
.
predict_tower
:
logger
.
info
(
"Building graph for predict tower p{}..."
.
format
(
k
))
with
tf
.
device
(
'/gpu:{}'
.
format
(
k
)
if
k
>=
0
else
'/cpu:0'
),
\
tf
.
name_scope
(
'towerp{}'
.
format
(
k
)):
self
.
model
.
build_graph
(
inputs
,
False
)
def
_single_tower_grad
(
self
):
""" Get grad and cost for single-tower"""
self
.
dequed_inputs
=
model_inputs
=
self
.
_get_model_inputs
()
...
...
@@ -158,13 +179,13 @@ class QueueInputTrainer(Trainer):
cost_var
=
self
.
model
.
get_cost
()
grads
=
self
.
config
.
optimizer
.
compute_gradients
(
cost_var
,
gate_gradients
=
0
)
# GATE_NONE
tf
.
add_to_collection
(
MOVING_SUMMARY_VARS_KEY
,
cost_var
)
add_moving_summary
(
cost_var
)
return
grads
def
_build_enque_thread
(
self
):
""" create a thread that keeps filling the queue """
self
.
input_th
=
EnqueueThread
(
self
)
self
.
extra_threads_procs
.
append
(
self
.
input_th
)
self
.
_
extra_threads_procs
.
append
(
self
.
input_th
)
def
train
(
self
):
assert
self
.
config
.
nr_tower
==
1
,
\
...
...
@@ -176,9 +197,6 @@ class QueueInputTrainer(Trainer):
grads
=
self
.
process_grads
(
grads
)
describe_model
()
with
freeze_collection
(
self
.
SUMMARY_BACKUP_KEYS
):
self
.
_build_predict_tower
()
self
.
train_op
=
tf
.
group
(
self
.
config
.
optimizer
.
apply_gradients
(
grads
,
get_global_step_var
()),
summary_moving_average
(),
name
=
'train_op'
)
...
...
@@ -213,14 +231,5 @@ class QueueInputTrainer(Trainer):
:param tower: return the kth predict_func
:returns: an `OnlinePredictor`
"""
tower
=
self
.
predict_tower
[
tower
%
len
(
self
.
predict_tower
)]
raw_input_vars
=
get_vars_by_names
(
input_names
)
output_names
=
[
'towerp{}/'
.
format
(
tower
)
+
n
for
n
in
output_names
]
output_vars
=
get_vars_by_names
(
output_names
)
return
OnlinePredictor
(
self
.
sess
,
raw_input_vars
,
output_vars
)
def
get_predict_funcs
(
self
,
input_names
,
output_names
,
n
):
""" return n predictors evenly on each predict_tower"""
return
[
self
.
get_predict_func
(
input_names
,
output_names
,
k
)
for
k
in
range
(
n
)]
return
self
.
predictor_factory
.
get_predictor
(
input_names
,
output_names
,
tower
)
tensorpack/utils/naming.py
View file @
e04d846a
...
...
@@ -9,6 +9,9 @@ GLOBAL_STEP_VAR_NAME = 'global_step:0'
MOVING_SUMMARY_VARS_KEY
=
'MOVING_SUMMARY_VARIABLES'
INPUT_VARS_KEY
=
'INPUT_VARIABLES'
import
tensorflow
as
tf
SUMMARY_BACKUP_KEYS
=
[
tf
.
GraphKeys
.
SUMMARIES
,
MOVING_SUMMARY_VARS_KEY
]
# export all upper case variables
all_local_names
=
locals
()
.
keys
()
__all__
=
[
x
for
x
in
all_local_names
if
x
.
isupper
()]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment