Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
8df83a93
Commit
8df83a93
authored
Feb 21, 2017
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
some change in gradproc, and fix #158
parent
10f55570
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
18 additions
and
22 deletions
+18
-22
tensorpack/tfutils/gradproc.py
tensorpack/tfutils/gradproc.py
+7
-17
tensorpack/tfutils/optimizer.py
tensorpack/tfutils/optimizer.py
+8
-3
tensorpack/train/multigpu.py
tensorpack/train/multigpu.py
+3
-2
No files found.
tensorpack/tfutils/gradproc.py
View file @
8df83a93
...
@@ -12,23 +12,9 @@ from ..utils import logger
...
@@ -12,23 +12,9 @@ from ..utils import logger
from
.symbolic_functions
import
rms
from
.symbolic_functions
import
rms
from
.summary
import
add_moving_summary
from
.summary
import
add_moving_summary
__all__
=
[
'GradientProcessor'
,
'FilterNoneGrad'
,
'GlobalNormClip'
,
'MapGradient'
,
'SummaryGradient'
,
'CheckGradient'
,
__all__
=
[
'GradientProcessor'
,
'ScaleGradient'
,
'apply_grad_processors'
]
'FilterNoneGrad'
,
'GlobalNormClip'
,
'MapGradient'
,
'SummaryGradient'
,
'CheckGradient'
,
'ScaleGradient'
]
def
apply_grad_processors
(
grads
,
gradprocs
):
"""
Args:
grads (list): list of (grad, var).
gradprocs (list[GradientProcessor]): gradient processors to apply.
Returns:
list: list of (grad, var) went through the processors.
"""
gradprocs
.
insert
(
0
,
FilterNoneGrad
())
g
=
grads
for
proc
in
gradprocs
:
g
=
proc
.
process
(
g
)
return
g
@
six
.
add_metaclass
(
ABCMeta
)
@
six
.
add_metaclass
(
ABCMeta
)
...
@@ -118,13 +104,17 @@ class MapGradient(GradientProcessor):
...
@@ -118,13 +104,17 @@ class MapGradient(GradientProcessor):
def
_process
(
self
,
grads
):
def
_process
(
self
,
grads
):
ret
=
[]
ret
=
[]
matched
=
False
for
grad
,
var
in
grads
:
for
grad
,
var
in
grads
:
if
re
.
match
(
self
.
regex
,
var
.
op
.
name
):
if
re
.
match
(
self
.
regex
,
var
.
op
.
name
):
matched
=
True
grad
=
self
.
func
(
grad
,
var
)
grad
=
self
.
func
(
grad
,
var
)
if
grad
is
not
None
:
if
grad
is
not
None
:
ret
.
append
((
grad
,
var
))
ret
.
append
((
grad
,
var
))
else
:
else
:
ret
.
append
((
grad
,
var
))
ret
.
append
((
grad
,
var
))
if
not
matched
:
logger
.
warn
(
"[MapGradient] No match was found for regex {}."
.
format
(
self
.
regex
))
return
ret
return
ret
...
...
tensorpack/tfutils/optimizer.py
View file @
8df83a93
...
@@ -5,7 +5,6 @@
...
@@ -5,7 +5,6 @@
import
tensorflow
as
tf
import
tensorflow
as
tf
from
contextlib
import
contextmanager
from
contextlib
import
contextmanager
from
.gradproc
import
apply_grad_processors
as
apply_gradproc
from
.gradproc
import
FilterNoneGrad
from
.gradproc
import
FilterNoneGrad
__all__
=
[
'apply_grad_processors'
,
'ProxyOptimizer'
,
__all__
=
[
'apply_grad_processors'
,
'ProxyOptimizer'
,
...
@@ -48,13 +47,19 @@ def apply_grad_processors(opt, gradprocs):
...
@@ -48,13 +47,19 @@ def apply_grad_processors(opt, gradprocs):
class
_ApplyGradientProcessor
(
ProxyOptimizer
):
class
_ApplyGradientProcessor
(
ProxyOptimizer
):
def
__init__
(
self
,
opt
,
gradprocs
):
def
__init__
(
self
,
opt
,
gradprocs
):
self
.
_gradprocs
=
gradprocs
self
.
_gradprocs
=
[
FilterNoneGrad
()]
+
gradprocs
super
(
_ApplyGradientProcessor
,
self
)
.
__init__
(
opt
)
super
(
_ApplyGradientProcessor
,
self
)
.
__init__
(
opt
)
def
apply_gradients
(
self
,
grads_and_vars
,
def
apply_gradients
(
self
,
grads_and_vars
,
global_step
=
None
,
name
=
None
):
global_step
=
None
,
name
=
None
):
g
=
apply_gradproc
(
grads_and_vars
,
self
.
_gradproc
s
)
g
=
self
.
_apply
(
grads_and_var
s
)
return
self
.
_opt
.
apply_gradients
(
g
,
global_step
,
name
)
return
self
.
_opt
.
apply_gradients
(
g
,
global_step
,
name
)
def
_apply
(
self
,
g
):
for
proc
in
self
.
_gradprocs
:
g
=
proc
.
process
(
g
)
return
g
return
_ApplyGradientProcessor
(
opt
,
gradprocs
)
return
_ApplyGradientProcessor
(
opt
,
gradprocs
)
...
...
tensorpack/train/multigpu.py
View file @
8df83a93
...
@@ -13,7 +13,7 @@ from ..utils.naming import SUMMARY_BACKUP_KEYS
...
@@ -13,7 +13,7 @@ from ..utils.naming import SUMMARY_BACKUP_KEYS
from
..utils.concurrency
import
LoopThread
from
..utils.concurrency
import
LoopThread
from
..tfutils.tower
import
TowerContext
from
..tfutils.tower
import
TowerContext
from
..tfutils.collection
import
backup_collection
,
restore_collection
from
..tfutils.collection
import
backup_collection
,
restore_collection
from
..tfutils.gradproc
import
apply_grad_processors
,
ScaleGradient
from
..tfutils.gradproc
import
FilterNoneGrad
,
ScaleGradient
from
.base
import
Trainer
from
.base
import
Trainer
from
.feedfree
import
SingleCostFeedfreeTrainer
from
.feedfree
import
SingleCostFeedfreeTrainer
...
@@ -190,11 +190,12 @@ class AsyncMultiGPUTrainer(MultiGPUTrainer,
...
@@ -190,11 +190,12 @@ class AsyncMultiGPUTrainer(MultiGPUTrainer,
super
(
AsyncMultiGPUTrainer
,
self
)
.
_setup
()
super
(
AsyncMultiGPUTrainer
,
self
)
.
_setup
()
grad_list
=
MultiGPUTrainer
.
_multi_tower_grads
(
grad_list
=
MultiGPUTrainer
.
_multi_tower_grads
(
self
.
config
.
tower
,
lambda
:
self
.
_get_cost_and_grad
()[
1
])
self
.
config
.
tower
,
lambda
:
self
.
_get_cost_and_grad
()[
1
])
grad_list
=
FilterNoneGrad
()
.
process
(
grad_list
)
if
self
.
_scale_gradient
and
self
.
config
.
nr_tower
>
1
:
if
self
.
_scale_gradient
and
self
.
config
.
nr_tower
>
1
:
# pretend to average the grads, in order to make async and
# pretend to average the grads, in order to make async and
# sync have consistent effective learning rate
# sync have consistent effective learning rate
gradproc
=
ScaleGradient
((
'.*'
,
1.0
/
self
.
config
.
nr_tower
),
log
=
False
)
gradproc
=
ScaleGradient
((
'.*'
,
1.0
/
self
.
config
.
nr_tower
),
log
=
False
)
grad_list
=
apply_grad_processors
(
grad_list
,
[
gradproc
]
)
grad_list
=
gradproc
.
process
(
grad_list
)
# use grad from the first tower for iteration in main thread
# use grad from the first tower for iteration in main thread
self
.
train_op
=
self
.
config
.
optimizer
.
apply_gradients
(
grad_list
[
0
],
name
=
'min_op'
)
self
.
train_op
=
self
.
config
.
optimizer
.
apply_gradients
(
grad_list
[
0
],
name
=
'min_op'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment