Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
977134e1
Commit
977134e1
authored
Jan 02, 2016
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
after_train
parent
696a7db7
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
84 additions
and
88 deletions
+84
-88
example_cifar10.py
example_cifar10.py
+13
-7
tensorpack/callbacks/base.py
tensorpack/callbacks/base.py
+7
-5
tensorpack/callbacks/common.py
tensorpack/callbacks/common.py
+3
-0
tensorpack/callbacks/group.py
tensorpack/callbacks/group.py
+16
-4
tensorpack/dataflow/imgaug/imgproc.py
tensorpack/dataflow/imgaug/imgproc.py
+2
-3
tensorpack/train.py
tensorpack/train.py
+42
-57
tensorpack/utils/__init__.py
tensorpack/utils/__init__.py
+1
-2
tensorpack/utils/concurrency.py
tensorpack/utils/concurrency.py
+0
-10
No files found.
example_cifar10.py
View file @
977134e1
#!/usr/bin/env python2
# -*- coding: UTF-8 -*-
# File:
loyaltry
.py
# File:
example_cifar10
.py
# Author: Yuxin Wu <ppwwyyxx@gmail.com>
import
tensorflow
as
tf
...
...
@@ -16,10 +16,15 @@ from tensorpack.utils.symbolic_functions import *
from
tensorpack.utils.summary
import
*
from
tensorpack.dataflow
import
*
from
tensorpack.dataflow
import
imgaug
from
cifar10
import
cifar10
"""
This config follows the same preprocessing/model/hyperparemeters as in
tensorflow cifar10 examples. (https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/models/image/cifar10/)
But it's faster.
"""
BATCH_SIZE
=
128
MIN_AFTER_DEQUEUE
=
20000
# a large number, as in the official example
MIN_AFTER_DEQUEUE
=
int
(
50000
*
0.4
)
CAPACITY
=
MIN_AFTER_DEQUEUE
+
3
*
BATCH_SIZE
def
get_model
(
inputs
,
is_training
):
...
...
@@ -50,7 +55,7 @@ def get_model(inputs, is_training):
l
=
FullyConnected
(
'fc1'
,
l
,
out_dim
=
192
,
W_init
=
tf
.
truncated_normal_initializer
(
stddev
=
0.04
),
b_init
=
tf
.
constant_initializer
(
0.1
))
#
# fc will have activation summary by default. disable this
for the output layer
#
fc will have activation summary by default. disable
for the output layer
logits
=
FullyConnected
(
'linear'
,
l
,
out_dim
=
10
,
summary_activation
=
False
,
nl
=
tf
.
identity
,
W_init
=
tf
.
truncated_normal_initializer
(
stddev
=
1.0
/
192
))
...
...
@@ -91,14 +96,14 @@ def get_config():
Flip
(
horiz
=
True
),
BrightnessAdd
(
63
),
Contrast
((
0.2
,
1.8
)),
PerImageWhitening
(
all_channel
=
True
)
MeanVarianceNormalize
(
all_channel
=
True
)
]
dataset_train
=
AugmentImageComponent
(
dataset_train
,
augmentors
)
dataset_train
=
BatchData
(
dataset_train
,
128
)
augmentors
=
[
CenterCrop
((
24
,
24
)),
PerImageWhitening
(
all_channel
=
True
)
MeanVarianceNormalize
(
all_channel
=
True
)
]
dataset_test
=
dataset
.
Cifar10
(
'test'
)
dataset_test
=
AugmentImageComponent
(
dataset_test
,
augmentors
)
...
...
@@ -107,7 +112,6 @@ def get_config():
sess_config
=
get_default_sess_config
()
sess_config
.
gpu_options
.
per_process_gpu_memory_fraction
=
0.5
sess_config
.
device_count
[
'GPU'
]
=
2
# prepare model
input_vars
=
[
...
...
@@ -150,6 +154,8 @@ if __name__ == '__main__':
args
=
parser
.
parse_args
()
if
args
.
gpu
:
os
.
environ
[
'CUDA_VISIBLE_DEVICES'
]
=
args
.
gpu
else
:
os
.
environ
[
'CUDA_VISIBLE_DEVICES'
]
=
'0'
with
tf
.
Graph
()
.
as_default
():
with
tf
.
device
(
'/cpu:0'
):
...
...
tensorpack/callbacks/base.py
View file @
977134e1
...
...
@@ -31,13 +31,15 @@ class Callback(object):
Called before starting iterative training
"""
def
trigger_step
(
self
,
inputs
,
outputs
,
cost
):
def
after_train
(
self
):
"""
Called after training
"""
def
trigger_step
(
self
):
"""
Callback to be triggered after every step (every backpropagation)
Args:
inputs: the list of input values
outputs: list of output values after running this inputs
cost: the cost value after running this input
Could be useful to apply some tricks on parameters (clipping, low-rank, etc)
"""
def
trigger_epoch
(
self
):
...
...
tensorpack/callbacks/common.py
View file @
977134e1
...
...
@@ -56,3 +56,6 @@ class SummaryWriter(Callback):
logger
.
info
(
'{}: {:.4f}'
.
format
(
val
.
tag
,
val
.
simple_value
))
self
.
writer
.
add_summary
(
summary
,
get_global_step
())
def
after_train
(
self
):
self
.
writer
.
close
()
tensorpack/callbacks/group.py
View file @
977134e1
...
...
@@ -78,9 +78,13 @@ class TrainCallbacks(Callback):
cb
.
before_train
()
self
.
writer
=
tf
.
get_collection
(
SUMMARY_WRITER_COLLECTION_KEY
)[
0
]
def
trigger_step
(
self
,
inputs
,
outputs
,
cost
):
def
after_train
(
self
):
for
cb
in
self
.
cbs
:
cb
.
trigger_step
(
inputs
,
outputs
,
cost
)
cb
.
after_train
()
def
trigger_step
(
self
):
for
cb
in
self
.
cbs
:
cb
.
trigger_step
()
def
trigger_epoch
(
self
):
tm
=
CallbackTimeLogger
()
...
...
@@ -111,6 +115,10 @@ class TestCallbacks(Callback):
for
cb
in
self
.
cbs
:
cb
.
before_train
()
def
after_train
(
self
):
for
cb
in
self
.
cbs
:
cb
.
after_train
()
def
trigger_epoch
(
self
):
if
not
self
.
cbs
:
return
...
...
@@ -153,8 +161,12 @@ class Callbacks(Callback):
self
.
train
.
before_train
()
self
.
test
.
before_train
()
def
trigger_step
(
self
,
inputs
,
outputs
,
cost
):
self
.
train
.
trigger_step
(
inputs
,
outputs
,
cost
)
def
after_train
(
self
):
self
.
train
.
after_train
()
self
.
test
.
after_train
()
def
trigger_step
(
self
):
self
.
train
.
trigger_step
()
# test callback don't have trigger_step
def
trigger_epoch
(
self
):
...
...
tensorpack/dataflow/imgaug/imgproc.py
View file @
977134e1
...
...
@@ -6,7 +6,7 @@
from
.base
import
ImageAugmentor
import
numpy
as
np
__all__
=
[
'BrightnessAdd'
,
'Contrast'
,
'
PerImageWhitening
'
]
__all__
=
[
'BrightnessAdd'
,
'Contrast'
,
'
MeanVarianceNormalize
'
]
class
BrightnessAdd
(
ImageAugmentor
):
"""
...
...
@@ -35,7 +35,7 @@ class Contrast(ImageAugmentor):
img
.
arr
=
(
arr
-
mean
)
*
r
+
mean
img
.
arr
=
np
.
clip
(
img
.
arr
,
0
,
255
)
class
PerImageWhitening
(
ImageAugmentor
):
class
MeanVarianceNormalize
(
ImageAugmentor
):
"""
Linearly scales image to have zero mean and unit norm.
x = (x - mean) / adjusted_stddev
...
...
@@ -43,7 +43,6 @@ class PerImageWhitening(ImageAugmentor):
"""
def
__init__
(
self
,
all_channel
=
True
):
self
.
all_channel
=
all_channel
pass
def
_augment
(
self
,
img
):
if
self
.
all_channel
:
...
...
tensorpack/train.py
View file @
977134e1
...
...
@@ -10,7 +10,7 @@ import argparse
import
tqdm
from
utils
import
*
from
utils.concurrency
import
EnqueueThread
,
coordinator_guard
from
utils.concurrency
import
EnqueueThread
from
callbacks
import
*
from
utils.summary
import
summary_moving_average
from
utils.modelutils
import
describe_model
...
...
@@ -75,29 +75,12 @@ class TrainConfig(object):
assert
len
(
kwargs
)
==
0
,
'Unknown arguments: {}'
.
format
(
str
(
kwargs
.
keys
()))
def
average_gradients
(
tower_grads
):
average_grads
=
[]
for
grad_and_vars
in
zip
(
*
tower_grads
):
# Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
grads
=
[]
for
g
,
_
in
grad_and_vars
:
# Add 0 dimension to the gradients to represent the tower.
expanded_g
=
tf
.
expand_dims
(
g
,
0
)
# Append on a 'tower' dimension which we will average over below.
grads
.
append
(
expanded_g
)
# Average over the 'tower' dimension.
grad
=
tf
.
concat
(
0
,
grads
)
grad
=
tf
.
reduce_mean
(
grad
,
0
)
# Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to
# the Variable.
v
=
grad_and_vars
[
0
][
1
]
grad_and_var
=
(
grad
,
v
)
average_grads
.
append
(
grad_and_var
)
return
average_grads
average_grads
=
[]
for
grad_and_vars
in
zip
(
*
tower_grads
):
grad
=
tf
.
add_n
([
x
[
0
]
for
x
in
grad_and_vars
])
/
float
(
len
(
tower_grads
))
v
=
grad_and_vars
[
0
][
1
]
average_grads
.
append
((
grad
,
v
))
return
average_grads
def
summary_grads
(
grads
):
for
grad
,
var
in
grads
:
...
...
@@ -139,17 +122,17 @@ def start_train(config):
kept_summaries
=
{}
grads
=
[]
for
i
in
range
(
config
.
nr_tower
):
with
tf
.
device
(
'/gpu:{}'
.
format
(
i
))
:
with
tf
.
name_scope
(
'tower{}'
.
format
(
i
))
as
scope
:
model_inputs
=
get_model_inputs
()
output_vars
,
cost_var
=
config
.
get_model_func
(
model_inputs
,
is_training
=
True
)
grads
.
append
(
config
.
optimizer
.
compute_gradients
(
cost_var
))
if
i
==
0
:
tf
.
get_variable_scope
()
.
reuse_variables
()
for
k
in
coll_keys
:
kept_summaries
[
k
]
=
copy
.
copy
(
tf
.
get_collection
(
k
))
with
tf
.
device
(
'/gpu:{}'
.
format
(
i
))
,
\
tf
.
name_scope
(
'tower{}'
.
format
(
i
))
as
scope
:
model_inputs
=
get_model_inputs
()
output_vars
,
cost_var
=
config
.
get_model_func
(
model_inputs
,
is_training
=
True
)
grads
.
append
(
config
.
optimizer
.
compute_gradients
(
cost_var
))
if
i
==
0
:
tf
.
get_variable_scope
()
.
reuse_variables
()
for
k
in
coll_keys
:
kept_summaries
[
k
]
=
copy
.
copy
(
tf
.
get_collection
(
k
))
for
k
in
coll_keys
:
# avoid repeating summary on multiple devices
del
tf
.
get_collection
(
k
)[:]
tf
.
get_collection
(
k
)
.
extend
(
kept_summaries
[
k
])
...
...
@@ -172,29 +155,31 @@ def start_train(config):
# start training:
coord
=
tf
.
train
.
Coordinator
()
# a thread that keeps filling the queue
input_th
=
EnqueueThread
(
sess
,
coord
,
enqueue_op
,
config
.
dataset
,
input_queue
)
model_th
=
tf
.
train
.
start_queue_runners
(
sess
=
sess
,
coord
=
coord
,
daemon
=
True
,
start
=
True
)
input_th
=
EnqueueThread
(
sess
,
coord
,
enqueue_op
,
config
.
dataset
,
input_queue
)
input_th
.
start
()
with
sess
.
as_default
(),
\
coordinator_guard
(
sess
,
coord
):
logger
.
info
(
"Start with global_step={}"
.
format
(
get_global_step
()))
callbacks
.
before_train
()
for
epoch
in
xrange
(
1
,
config
.
max_epoch
):
with
timed_operation
(
'epoch {}'
.
format
(
epoch
)):
for
step
in
tqdm
.
trange
(
config
.
step_per_epoch
,
leave
=
True
,
mininterval
=
0.2
):
if
coord
.
should_stop
():
return
# TODO if no one uses trigger_step, train_op can be
# faster, see: https://github.com/soumith/convnet-benchmarks/pull/67/files
fetches
=
[
train_op
,
cost_var
]
+
output_vars
+
model_inputs
results
=
sess
.
run
(
fetches
)
cost
=
results
[
1
]
outputs
=
results
[
2
:
2
+
len
(
output_vars
)]
inputs
=
results
[
-
len
(
model_inputs
):]
callbacks
.
trigger_step
(
inputs
,
outputs
,
cost
)
# note that summary_op will take a data from the queue.
callbacks
.
trigger_epoch
()
with
sess
.
as_default
():
try
:
logger
.
info
(
"Start training with global_step={}"
.
format
(
get_global_step
()))
callbacks
.
before_train
()
for
epoch
in
xrange
(
1
,
config
.
max_epoch
):
with
timed_operation
(
'epoch {}'
.
format
(
epoch
)):
for
step
in
tqdm
.
trange
(
config
.
step_per_epoch
,
leave
=
True
,
mininterval
=
0.2
):
if
coord
.
should_stop
():
return
sess
.
run
([
train_op
])
# faster since train_op return None
callbacks
.
trigger_step
()
# note that summary_op will take a data from the queue.
callbacks
.
trigger_epoch
()
except
(
KeyboardInterrupt
,
Exception
):
raise
finally
:
coord
.
request_stop
()
queue
.
close
(
cancel_pending_enqueues
=
True
)
callbacks
.
after_train
()
sess
.
close
()
tensorpack/utils/__init__.py
View file @
977134e1
...
...
@@ -37,8 +37,7 @@ def get_default_sess_config():
Tensorflow default session config consume too much resources
"""
conf
=
tf
.
ConfigProto
()
conf
.
device_count
[
'GPU'
]
=
1
conf
.
gpu_options
.
per_process_gpu_memory_fraction
=
0.8
conf
.
gpu_options
.
per_process_gpu_memory_fraction
=
0.6
conf
.
gpu_options
.
allocator_type
=
'BFC'
conf
.
allow_soft_placement
=
True
return
conf
...
...
tensorpack/utils/concurrency.py
View file @
977134e1
...
...
@@ -51,13 +51,3 @@ class EnqueueThread(threading.Thread):
logger
.
exception
(
"Exception in EnqueueThread:"
)
self
.
queue
.
close
(
cancel_pending_enqueues
=
True
)
self
.
coord
.
request_stop
()
@
contextmanager
def
coordinator_guard
(
sess
,
coord
):
try
:
yield
except
(
KeyboardInterrupt
,
Exception
)
as
e
:
raise
finally
:
coord
.
request_stop
()
sess
.
close
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment