Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
209da29e
Commit
209da29e
authored
Aug 21, 2018
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[DQN] fix the layout when channel>1
parent
c5fd5e17
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
37 additions
and
27 deletions
+37
-27
examples/DeepQNetwork/DQN.py
examples/DeepQNetwork/DQN.py
+0
-1
examples/DeepQNetwork/DQNModel.py
examples/DeepQNetwork/DQNModel.py
+11
-7
examples/DeepQNetwork/atari_wrapper.py
examples/DeepQNetwork/atari_wrapper.py
+4
-1
examples/DeepQNetwork/expreplay.py
examples/DeepQNetwork/expreplay.py
+9
-7
examples/FasterRCNN/README.md
examples/FasterRCNN/README.md
+7
-4
examples/FasterRCNN/config.py
examples/FasterRCNN/config.py
+1
-1
examples/FasterRCNN/model_frcnn.py
examples/FasterRCNN/model_frcnn.py
+1
-1
examples/FasterRCNN/model_mrcnn.py
examples/FasterRCNN/model_mrcnn.py
+3
-3
examples/FasterRCNN/train.py
examples/FasterRCNN/train.py
+0
-1
tensorpack/dataflow/common.py
tensorpack/dataflow/common.py
+1
-1
No files found.
examples/DeepQNetwork/DQN.py
View file @
209da29e
...
@@ -53,7 +53,6 @@ class Model(DQNModel):
...
@@ -53,7 +53,6 @@ class Model(DQNModel):
super
(
Model
,
self
)
.
__init__
(
IMAGE_SIZE
,
1
,
FRAME_HISTORY
,
METHOD
,
NUM_ACTIONS
,
GAMMA
)
super
(
Model
,
self
)
.
__init__
(
IMAGE_SIZE
,
1
,
FRAME_HISTORY
,
METHOD
,
NUM_ACTIONS
,
GAMMA
)
def
_get_DQN_prediction
(
self
,
image
):
def
_get_DQN_prediction
(
self
,
image
):
""" image: [N, H, W, C * history] in [0,255]"""
image
=
image
/
255.0
image
=
image
/
255.0
with
argscope
(
Conv2D
,
activation
=
lambda
x
:
PReLU
(
'prelu'
,
x
),
use_bias
=
True
):
with
argscope
(
Conv2D
,
activation
=
lambda
x
:
PReLU
(
'prelu'
,
x
),
use_bias
=
True
):
l
=
(
LinearWrap
(
image
)
l
=
(
LinearWrap
(
image
)
...
...
examples/DeepQNetwork/DQNModel.py
View file @
209da29e
...
@@ -15,10 +15,12 @@ class Model(ModelDesc):
...
@@ -15,10 +15,12 @@ class Model(ModelDesc):
learning_rate
=
1e-3
learning_rate
=
1e-3
def
__init__
(
self
,
image_shape
,
channel
,
history
,
method
,
num_actions
,
gamma
):
def
__init__
(
self
,
image_shape
,
channel
,
history
,
method
,
num_actions
,
gamma
):
assert
len
(
image_shape
)
==
2
,
image_shape
self
.
channel
=
channel
self
.
channel
=
channel
self
.
_shape2d
=
image_shape
self
.
_shape2d
=
tuple
(
image_shape
)
self
.
_shape3d
=
image_shape
+
(
channel
,
)
self
.
_shape3d
=
self
.
_shape2d
+
(
channel
,
)
self
.
_shape4d_for_prediction
=
(
-
1
,
)
+
image_shape
+
(
channel
*
history
,
)
self
.
_shape4d_for_prediction
=
(
-
1
,
)
+
self
.
_shape2d
+
(
history
*
channel
,
)
self
.
_channel
=
channel
self
.
_channel
=
channel
self
.
history
=
history
self
.
history
=
history
self
.
method
=
method
self
.
method
=
method
...
@@ -31,7 +33,7 @@ class Model(ModelDesc):
...
@@ -31,7 +33,7 @@ class Model(ModelDesc):
# The first h are the current state, and the last h are the next state.
# The first h are the current state, and the last h are the next state.
return
[
tf
.
placeholder
(
tf
.
uint8
,
return
[
tf
.
placeholder
(
tf
.
uint8
,
(
None
,)
+
self
.
_shape2d
+
(
None
,)
+
self
.
_shape2d
+
(
self
.
_channel
*
(
self
.
history
+
1
)
,),
(
(
self
.
history
+
1
)
*
self
.
channel
,),
'comb_state'
),
'comb_state'
),
tf
.
placeholder
(
tf
.
int64
,
(
None
,),
'action'
),
tf
.
placeholder
(
tf
.
int64
,
(
None
,),
'action'
),
tf
.
placeholder
(
tf
.
float32
,
(
None
,),
'reward'
),
tf
.
placeholder
(
tf
.
float32
,
(
None
,),
'reward'
),
...
@@ -43,20 +45,22 @@ class Model(ModelDesc):
...
@@ -43,20 +45,22 @@ class Model(ModelDesc):
@
auto_reuse_variable_scope
@
auto_reuse_variable_scope
def
get_DQN_prediction
(
self
,
image
):
def
get_DQN_prediction
(
self
,
image
):
""" image: [N, H, W, history * C] in [0,255]"""
return
self
.
_get_DQN_prediction
(
image
)
return
self
.
_get_DQN_prediction
(
image
)
def
build_graph
(
self
,
comb_state
,
action
,
reward
,
isOver
):
def
build_graph
(
self
,
comb_state
,
action
,
reward
,
isOver
):
comb_state
=
tf
.
cast
(
comb_state
,
tf
.
float32
)
comb_state
=
tf
.
cast
(
comb_state
,
tf
.
float32
)
comb_state
=
tf
.
reshape
(
comb_state
,
[
-
1
]
+
list
(
self
.
_shape3d
)
+
[
self
.
history
+
1
])
comb_state
=
tf
.
reshape
(
comb_state
,
[
-
1
]
+
list
(
self
.
_shape2d
)
+
[
self
.
history
+
1
,
self
.
channel
])
state
=
tf
.
slice
(
comb_state
,
[
0
,
0
,
0
,
0
,
0
],
[
-
1
,
-
1
,
-
1
,
-
1
,
self
.
history
])
state
=
tf
.
slice
(
comb_state
,
[
0
,
0
,
0
,
0
,
0
],
[
-
1
,
-
1
,
-
1
,
self
.
history
,
-
1
])
state
=
tf
.
reshape
(
state
,
self
.
_shape4d_for_prediction
,
name
=
'state'
)
state
=
tf
.
reshape
(
state
,
self
.
_shape4d_for_prediction
,
name
=
'state'
)
self
.
predict_value
=
self
.
get_DQN_prediction
(
state
)
self
.
predict_value
=
self
.
get_DQN_prediction
(
state
)
if
not
get_current_tower_context
()
.
is_training
:
if
not
get_current_tower_context
()
.
is_training
:
return
return
reward
=
tf
.
clip_by_value
(
reward
,
-
1
,
1
)
reward
=
tf
.
clip_by_value
(
reward
,
-
1
,
1
)
next_state
=
tf
.
slice
(
comb_state
,
[
0
,
0
,
0
,
0
,
1
],
[
-
1
,
-
1
,
-
1
,
-
1
,
self
.
history
],
name
=
'next_state'
)
next_state
=
tf
.
slice
(
comb_state
,
[
0
,
0
,
0
,
1
,
0
],
[
-
1
,
-
1
,
-
1
,
self
.
history
,
-
1
],
name
=
'next_state'
)
next_state
=
tf
.
reshape
(
next_state
,
self
.
_shape4d_for_prediction
)
next_state
=
tf
.
reshape
(
next_state
,
self
.
_shape4d_for_prediction
)
action_onehot
=
tf
.
one_hot
(
action
,
self
.
num_actions
,
1.0
,
0.0
)
action_onehot
=
tf
.
one_hot
(
action
,
self
.
num_actions
,
1.0
,
0.0
)
...
...
examples/DeepQNetwork/atari_wrapper.py
View file @
209da29e
...
@@ -27,8 +27,11 @@ class MapState(gym.ObservationWrapper):
...
@@ -27,8 +27,11 @@ class MapState(gym.ObservationWrapper):
class
FrameStack
(
gym
.
Wrapper
):
class
FrameStack
(
gym
.
Wrapper
):
"""
Buffer observations and stack across channels (last axis).
The output observation has shape (H, W, History * Channel)
"""
def
__init__
(
self
,
env
,
k
):
def
__init__
(
self
,
env
,
k
):
"""Buffer observations and stack across channels (last axis)."""
gym
.
Wrapper
.
__init__
(
self
,
env
)
gym
.
Wrapper
.
__init__
(
self
,
env
)
self
.
k
=
k
self
.
k
=
k
self
.
frames
=
deque
([],
maxlen
=
k
)
self
.
frames
=
deque
([],
maxlen
=
k
)
...
...
examples/DeepQNetwork/expreplay.py
View file @
209da29e
...
@@ -25,7 +25,8 @@ class ReplayMemory(object):
...
@@ -25,7 +25,8 @@ class ReplayMemory(object):
def
__init__
(
self
,
max_size
,
state_shape
,
history_len
):
def
__init__
(
self
,
max_size
,
state_shape
,
history_len
):
self
.
max_size
=
int
(
max_size
)
self
.
max_size
=
int
(
max_size
)
self
.
state_shape
=
state_shape
self
.
state_shape
=
state_shape
self
.
_state_transpose
=
list
(
range
(
1
,
len
(
state_shape
)
+
1
))
+
[
0
]
assert
len
(
state_shape
)
==
3
,
state_shape
# self._state_transpose = list(range(1, len(state_shape) + 1)) + [0]
self
.
_channel
=
state_shape
[
2
]
if
len
(
state_shape
)
==
3
else
1
self
.
_channel
=
state_shape
[
2
]
if
len
(
state_shape
)
==
3
else
1
self
.
_shape3d
=
(
state_shape
[
0
],
state_shape
[
1
],
self
.
_channel
*
(
history_len
+
1
))
self
.
_shape3d
=
(
state_shape
[
0
],
state_shape
[
1
],
self
.
_channel
*
(
history_len
+
1
))
self
.
history_len
=
int
(
history_len
)
self
.
history_len
=
int
(
history_len
)
...
@@ -57,7 +58,7 @@ class ReplayMemory(object):
...
@@ -57,7 +58,7 @@ class ReplayMemory(object):
self
.
_hist
.
append
(
exp
)
self
.
_hist
.
append
(
exp
)
def
recent_state
(
self
):
def
recent_state
(
self
):
""" return a list of
(hist_len-1,) + STATE_SIZE
"""
""" return a list of
``hist_len-1`` elements, each of shape ``self.state_shape``
"""
lst
=
list
(
self
.
_hist
)
lst
=
list
(
self
.
_hist
)
states
=
[
np
.
zeros
(
self
.
state_shape
,
dtype
=
'uint8'
)]
*
(
self
.
_hist
.
maxlen
-
len
(
lst
))
states
=
[
np
.
zeros
(
self
.
state_shape
,
dtype
=
'uint8'
)]
*
(
self
.
_hist
.
maxlen
-
len
(
lst
))
states
.
extend
([
k
.
state
for
k
in
lst
])
states
.
extend
([
k
.
state
for
k
in
lst
])
...
@@ -65,7 +66,7 @@ class ReplayMemory(object):
...
@@ -65,7 +66,7 @@ class ReplayMemory(object):
def
sample
(
self
,
idx
):
def
sample
(
self
,
idx
):
""" return a tuple of (s,r,a,o),
""" return a tuple of (s,r,a,o),
where s is of shape [H, W,
channel * (hist_len+1)
]"""
where s is of shape [H, W,
(hist_len+1) * channel
]"""
idx
=
(
self
.
_curr_pos
+
idx
)
%
self
.
_curr_size
idx
=
(
self
.
_curr_pos
+
idx
)
%
self
.
_curr_size
k
=
self
.
history_len
+
1
k
=
self
.
history_len
+
1
if
idx
+
k
<=
self
.
_curr_size
:
if
idx
+
k
<=
self
.
_curr_size
:
...
@@ -84,14 +85,14 @@ class ReplayMemory(object):
...
@@ -84,14 +85,14 @@ class ReplayMemory(object):
# the next_state is a different episode if current_state.isOver==True
# the next_state is a different episode if current_state.isOver==True
def
_pad_sample
(
self
,
state
,
reward
,
action
,
isOver
):
def
_pad_sample
(
self
,
state
,
reward
,
action
,
isOver
):
# state: Hist+1,H,W,C
for
k
in
range
(
self
.
history_len
-
2
,
-
1
,
-
1
):
for
k
in
range
(
self
.
history_len
-
2
,
-
1
,
-
1
):
if
isOver
[
k
]:
if
isOver
[
k
]:
state
=
copy
.
deepcopy
(
state
)
state
=
copy
.
deepcopy
(
state
)
state
[:
k
+
1
]
.
fill
(
0
)
state
[:
k
+
1
]
.
fill
(
0
)
break
break
# move the first dim to the last
# move the first dim to the last
state
=
state
.
transpose
(
*
self
.
_state_transpose
)
state
=
state
.
transpose
(
1
,
2
,
0
,
3
)
.
reshape
(
self
.
_shape3d
)
state
=
state
.
reshape
(
self
.
_shape3d
)
return
(
state
,
reward
[
-
2
],
action
[
-
2
],
isOver
[
-
2
])
return
(
state
,
reward
[
-
2
],
action
[
-
2
],
isOver
[
-
2
])
def
_slice
(
self
,
arr
,
start
,
end
):
def
_slice
(
self
,
arr
,
start
,
end
):
...
@@ -202,10 +203,11 @@ class ExpReplay(DataFlow, Callback):
...
@@ -202,10 +203,11 @@ class ExpReplay(DataFlow, Callback):
# build a history state
# build a history state
history
=
self
.
mem
.
recent_state
()
history
=
self
.
mem
.
recent_state
()
history
.
append
(
old_s
)
history
.
append
(
old_s
)
history
=
np
.
concatenate
(
history
,
axis
=-
1
)
history
=
np
.
concatenate
(
history
,
axis
=-
1
)
# H,W,HistxC
history
=
np
.
expand_dims
(
history
,
axis
=
0
)
# assume batched network
# assume batched network
q_values
=
self
.
predictor
(
np
.
expand_dims
(
history
,
0
)
)[
0
][
0
]
# this is the bottleneck
q_values
=
self
.
predictor
(
history
)[
0
][
0
]
# this is the bottleneck
act
=
np
.
argmax
(
q_values
)
act
=
np
.
argmax
(
q_values
)
self
.
_current_ob
,
reward
,
isOver
,
info
=
self
.
player
.
step
(
act
)
self
.
_current_ob
,
reward
,
isOver
,
info
=
self
.
player
.
step
(
act
)
self
.
_current_game_score
.
feed
(
reward
)
self
.
_current_game_score
.
feed
(
reward
)
...
...
examples/FasterRCNN/README.md
View file @
209da29e
...
@@ -52,18 +52,21 @@ Recommended configurations are listed in the table below.
...
@@ -52,18 +52,21 @@ Recommended configurations are listed in the table below.
The code is only valid for training with 1, 2, 4 or >=8 GPUs.
The code is only valid for training with 1, 2, 4 or >=8 GPUs.
Not training with 8 GPUs may result in different performance from the table below.
Not training with 8 GPUs may result in different performance from the table below.
### Inference:
To predict on an image (and show output in a window):
To predict on an image (and show output in a window):
```
```
./train.py --predict input.jpg --load /path/to/model --config SAME-AS-TRAINING
./train.py --predict input.jpg --load /path/to/model --config SAME-AS-TRAINING
```
```
Evaluate the performance of a model on COCO.
To Evaluate the performance of a model on COCO:
(Several trained models can be downloaded in
[
model zoo
](
http://models.tensorpack.com/FasterRCNN
)
):
```
```
./train.py --evaluate output.json --load /path/to/COCO-R50C4-MaskRCNN-Standard.npz \
./train.py --evaluate output.json --load /path/to/COCO-R50C4-MaskRCNN-Standard.npz \
--config
MODE_MASK=True DATA.BASEDIR=/path/to/COCO/DIR
--config
SAME-AS-TRAINING
```
```
Evaluation or prediction will need the same
`--config`
used during training.
Several trained models can be downloaded in the table below. Evaluation and
prediction will need to be run with the corresponding training configs.
## Results
## Results
...
...
examples/FasterRCNN/config.py
View file @
209da29e
...
@@ -99,7 +99,7 @@ _C.TRAIN.BASE_LR = 1e-2 # defined for a total batch size of 8. Otherwise it wil
...
@@ -99,7 +99,7 @@ _C.TRAIN.BASE_LR = 1e-2 # defined for a total batch size of 8. Otherwise it wil
_C
.
TRAIN
.
WARMUP
=
1000
# in terms of iterations. This is not affected by #GPUs
_C
.
TRAIN
.
WARMUP
=
1000
# in terms of iterations. This is not affected by #GPUs
_C
.
TRAIN
.
STEPS_PER_EPOCH
=
500
_C
.
TRAIN
.
STEPS_PER_EPOCH
=
500
#
Schedule
means "steps" only when total batch size is 8.
#
LR_SCHEDULE
means "steps" only when total batch size is 8.
# Otherwise the actual steps to decrease learning rate are computed from the schedule.
# Otherwise the actual steps to decrease learning rate are computed from the schedule.
# LR_SCHEDULE = [120000, 160000, 180000] # "1x" schedule in detectron
# LR_SCHEDULE = [120000, 160000, 180000] # "1x" schedule in detectron
_C
.
TRAIN
.
LR_SCHEDULE
=
[
240000
,
320000
,
360000
]
# "2x" schedule in detectron
_C
.
TRAIN
.
LR_SCHEDULE
=
[
240000
,
320000
,
360000
]
# "2x" schedule in detectron
...
...
examples/FasterRCNN/model_frcnn.py
View file @
209da29e
...
@@ -49,7 +49,7 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
...
@@ -49,7 +49,7 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
Returns:
Returns:
sampled_boxes: tx4 floatbox, the rois
sampled_boxes: tx4 floatbox, the rois
sampled_labels: t labels, in [0, #class-1]. Positive means foreground.
sampled_labels: t
int64
labels, in [0, #class-1]. Positive means foreground.
fg_inds_wrt_gt: #fg indices, each in range [0, m-1].
fg_inds_wrt_gt: #fg indices, each in range [0, m-1].
It contains the matching GT of each foreground roi.
It contains the matching GT of each foreground roi.
"""
"""
...
...
examples/FasterRCNN/model_mrcnn.py
View file @
209da29e
...
@@ -17,11 +17,11 @@ def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
...
@@ -17,11 +17,11 @@ def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
"""
"""
Args:
Args:
mask_logits: #fg x #category xhxw
mask_logits: #fg x #category xhxw
fg_labels: #fg, in 1~#class
fg_labels: #fg, in 1~#class
, int64
fg_target_masks: #fgxhxw, int
fg_target_masks: #fgxhxw, int
"""
"""
num_fg
=
tf
.
size
(
fg_labels
)
num_fg
=
tf
.
size
(
fg_labels
,
out_type
=
tf
.
int64
)
indices
=
tf
.
stack
([
tf
.
range
(
num_fg
),
tf
.
to_int32
(
fg_labels
)
-
1
],
axis
=
1
)
# #fgx2
indices
=
tf
.
stack
([
tf
.
range
(
num_fg
),
fg_labels
-
1
],
axis
=
1
)
# #fgx2
mask_logits
=
tf
.
gather_nd
(
mask_logits
,
indices
)
# #fgxhxw
mask_logits
=
tf
.
gather_nd
(
mask_logits
,
indices
)
# #fgxhxw
mask_probs
=
tf
.
sigmoid
(
mask_logits
)
mask_probs
=
tf
.
sigmoid
(
mask_logits
)
...
...
examples/FasterRCNN/train.py
View file @
209da29e
...
@@ -620,7 +620,6 @@ if __name__ == '__main__':
...
@@ -620,7 +620,6 @@ if __name__ == '__main__':
session_init
=
session_init
,
session_init
=
session_init
,
)
)
if
is_horovod
:
if
is_horovod
:
# horovod mode has the best speed for this model
trainer
=
HorovodTrainer
(
average
=
False
)
trainer
=
HorovodTrainer
(
average
=
False
)
else
:
else
:
# nccl mode has better speed than cpu mode
# nccl mode has better speed than cpu mode
...
...
tensorpack/dataflow/common.py
View file @
209da29e
...
@@ -303,7 +303,7 @@ class MapDataComponent(MapData):
...
@@ -303,7 +303,7 @@ class MapDataComponent(MapData):
r
=
self
.
_func
(
dp
[
self
.
_index
])
r
=
self
.
_func
(
dp
[
self
.
_index
])
if
r
is
None
:
if
r
is
None
:
return
None
return
None
dp
=
copy
(
dp
)
# shallow copy to avoid modifying the
lis
t
dp
=
copy
(
dp
)
# shallow copy to avoid modifying the
datapoin
t
dp
[
self
.
_index
]
=
r
dp
[
self
.
_index
]
=
r
return
dp
return
dp
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment