Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
68edaa0c
Commit
68edaa0c
authored
Feb 14, 2018
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix #653, fix #655
parent
3398df09
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
17 additions
and
8 deletions
+17
-8
examples/ResNet/imagenet-resnet.py
examples/ResNet/imagenet-resnet.py
+4
-3
tensorpack/input_source/input_source.py
tensorpack/input_source/input_source.py
+2
-1
tensorpack/train/trainers.py
tensorpack/train/trainers.py
+5
-2
tensorpack/utils/serialize.py
tensorpack/utils/serialize.py
+6
-2
No files found.
examples/ResNet/imagenet-resnet.py
View file @
68edaa0c
...
...
@@ -61,6 +61,7 @@ def get_data(name, batch):
def
get_config
(
model
,
fake
=
False
):
nr_tower
=
max
(
get_nr_gpu
(),
1
)
assert
args
.
batch
%
nr_tower
==
0
batch
=
args
.
batch
//
nr_tower
if
fake
:
...
...
@@ -73,14 +74,14 @@ def get_config(model, fake=False):
dataset_train
=
get_data
(
'train'
,
batch
)
dataset_val
=
get_data
(
'val'
,
batch
)
BASE_LR
=
0.1
*
(
args
.
batch
/
/
256
)
BASE_LR
=
0.1
*
(
args
.
batch
/
256.0
)
callbacks
=
[
ModelSaver
(),
ScheduledHyperParamSetter
(
'learning_rate'
,
[(
30
,
BASE_LR
*
1e-1
),
(
60
,
BASE_LR
*
1e-2
),
(
85
,
BASE_LR
*
1e-3
),
(
95
,
BASE_LR
*
1e-4
),
(
105
,
BASE_LR
*
1e-5
)]),
]
if
BASE_LR
!=
0.1
:
if
BASE_LR
>
0.1
:
callbacks
.
append
(
ScheduledHyperParamSetter
(
'learning_rate'
,
[(
0
,
0.1
),
(
3
,
BASE_LR
)],
interp
=
'linear'
))
...
...
@@ -115,7 +116,7 @@ if __name__ == '__main__':
parser
.
add_argument
(
'-d'
,
'--depth'
,
help
=
'resnet depth'
,
type
=
int
,
default
=
18
,
choices
=
[
18
,
34
,
50
,
101
,
152
])
parser
.
add_argument
(
'--eval'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--batch'
,
help
=
'total batch size.
need to be multiple of 256 to get similar
accuracy.'
,
parser
.
add_argument
(
'--batch'
,
help
=
'total batch size.
256 gives best
accuracy.'
,
default
=
256
,
type
=
int
)
parser
.
add_argument
(
'--mode'
,
choices
=
[
'resnet'
,
'preact'
,
'se'
],
help
=
'variants of resnet to use'
,
default
=
'resnet'
)
...
...
tensorpack/input_source/input_source.py
View file @
68edaa0c
...
...
@@ -493,9 +493,10 @@ class StagingInput(FeedfreeInput):
fetches
=
[
self
.
stage_op
,
unstage_op
])
def
_prefill
(
self
):
logger
.
info
(
"Pre-filling
staging a
rea ..."
)
logger
.
info
(
"Pre-filling
StagingA
rea ..."
)
for
k
in
range
(
self
.
nr_stage
):
self
.
stage_op
.
run
()
logger
.
info
(
"Put {} element(s) to StagingArea."
)
def
_before_run
(
self
,
ctx
):
# This has to happen once, right before the first iteration.
...
...
tensorpack/train/trainers.py
View file @
68edaa0c
...
...
@@ -4,6 +4,7 @@
import
os
import
tensorflow
as
tf
import
multiprocessing
as
mp
from
..callbacks
import
RunOp
from
..tfutils.sesscreate
import
NewSessionCreator
...
...
@@ -339,8 +340,10 @@ class HorovodTrainer(SingleCostTrainer):
# NOTE It will fail if GPU was already detected before initializing the session
# https://github.com/tensorflow/tensorflow/issues/8136
session_creator
.
config
.
gpu_options
.
visible_device_list
=
str
(
self
.
_local_rank
)
# TODO split #CPUs
# session_creator.config.inter_op_parallelism_threads =
try
:
session_creator
.
config
.
inter_op_parallelism_threads
=
mp
.
cpu_count
()
//
hvd
.
local_size
()
except
AttributeError
:
pass
super
(
HorovodTrainer
,
self
)
.
initialize
(
session_creator
,
session_init
)
...
...
tensorpack/utils/serialize.py
View file @
68edaa0c
...
...
@@ -8,11 +8,15 @@ import msgpack_numpy
msgpack_numpy
.
patch
()
try
:
# https://github.com/apache/arrow/pull/1223#issuecomment-359895666
import
sys
old_mod
=
sys
.
modules
.
get
(
'torch'
,
None
)
sys
.
modules
[
'torch'
]
=
None
# https://github.com/apache/arrow/pull/1223#issuecomment-359895666
import
pyarrow
as
pa
del
sys
.
modules
[
'torch'
]
if
old_mod
is
not
None
:
sys
.
modules
[
'torch'
]
=
old_mod
else
:
del
sys
.
modules
[
'torch'
]
except
ImportError
:
pa
=
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment