Commit a6a0889a authored by Yuxin Wu's avatar Yuxin Wu

migrate to new model interface without is_training

parent faa6f17c
...@@ -70,7 +70,7 @@ class Model(ModelDesc): ...@@ -70,7 +70,7 @@ class Model(ModelDesc):
InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'next_state'), InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'next_state'),
InputVar(tf.bool, (None,), 'isOver') ] InputVar(tf.bool, (None,), 'isOver') ]
def _get_DQN_prediction(self, image, is_training): def _get_DQN_prediction(self, image):
""" image: [0,255]""" """ image: [0,255]"""
image = image / 255.0 image = image / 255.0
with argscope(Conv2D, nl=PReLU.f, use_bias=True): with argscope(Conv2D, nl=PReLU.f, use_bias=True):
...@@ -91,9 +91,9 @@ class Model(ModelDesc): ...@@ -91,9 +91,9 @@ class Model(ModelDesc):
.FullyConnected('fc0', 512, nl=lambda x, name: LeakyReLU.f(x, 0.01, name)) .FullyConnected('fc0', 512, nl=lambda x, name: LeakyReLU.f(x, 0.01, name))
.FullyConnected('fct', NUM_ACTIONS, nl=tf.identity)()) .FullyConnected('fct', NUM_ACTIONS, nl=tf.identity)())
def _build_graph(self, inputs, is_training): def _build_graph(self, inputs):
state, action, reward, next_state, isOver = inputs state, action, reward, next_state, isOver = inputs
self.predict_value = self._get_DQN_prediction(state, is_training) self.predict_value = self._get_DQN_prediction(state)
action_onehot = tf.one_hot(action, NUM_ACTIONS, 1.0, 0.0) action_onehot = tf.one_hot(action, NUM_ACTIONS, 1.0, 0.0)
pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) #N, pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) #N,
max_pred_reward = tf.reduce_mean(tf.reduce_max( max_pred_reward = tf.reduce_mean(tf.reduce_max(
...@@ -101,14 +101,14 @@ class Model(ModelDesc): ...@@ -101,14 +101,14 @@ class Model(ModelDesc):
add_moving_summary(max_pred_reward) add_moving_summary(max_pred_reward)
with tf.variable_scope('target'): with tf.variable_scope('target'):
targetQ_predict_value = self._get_DQN_prediction(next_state, False) # NxA targetQ_predict_value = self._get_DQN_prediction(next_state) # NxA
# DQN # DQN
#best_v = tf.reduce_max(targetQ_predict_value, 1) # N, #best_v = tf.reduce_max(targetQ_predict_value, 1) # N,
# Double-DQN # Double-DQN
tf.get_variable_scope().reuse_variables() tf.get_variable_scope().reuse_variables()
next_predict_value = self._get_DQN_prediction(next_state, is_training) next_predict_value = self._get_DQN_prediction(next_state)
self.greedy_choice = tf.argmax(next_predict_value, 1) # N, self.greedy_choice = tf.argmax(next_predict_value, 1) # N,
predict_onehot = tf.one_hot(self.greedy_choice, NUM_ACTIONS, 1.0, 0.0) predict_onehot = tf.one_hot(self.greedy_choice, NUM_ACTIONS, 1.0, 0.0)
best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1) best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)
......
...@@ -30,7 +30,7 @@ class Model(ModelDesc): ...@@ -30,7 +30,7 @@ class Model(ModelDesc):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'), return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label') ]
def _build_graph(self, input_vars, is_training): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
image = image / 128.0 image = image / 128.0
...@@ -57,7 +57,7 @@ class Model(ModelDesc): ...@@ -57,7 +57,7 @@ class Model(ModelDesc):
outs.append(x4) outs.append(x4)
return tf.concat(3, outs, name='concat') return tf.concat(3, outs, name='concat')
with argscope(Conv2D, nl=BNReLU(is_training), use_bias=False): with argscope(Conv2D, nl=BNReLU(), use_bias=False):
l = Conv2D('conv0', image, 64, 7, stride=2) l = Conv2D('conv0', image, 64, 7, stride=2)
l = MaxPooling('pool0', l, 3, 2, padding='SAME') l = MaxPooling('pool0', l, 3, 2, padding='SAME')
l = Conv2D('conv1', l, 64, 1) l = Conv2D('conv1', l, 64, 1)
......
...@@ -33,7 +33,7 @@ class Model(ModelDesc): ...@@ -33,7 +33,7 @@ class Model(ModelDesc):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'), return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label') ]
def _build_graph(self, input_vars, is_training): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
image = image / 128.0 - 1 # ? image = image / 128.0 - 1 # ?
...@@ -69,7 +69,7 @@ class Model(ModelDesc): ...@@ -69,7 +69,7 @@ class Model(ModelDesc):
.Conv2D('conv277ba', ch_r, [7,1]) .Conv2D('conv277ba', ch_r, [7,1])
.Conv2D('conv277bb', ch, [1,7])()) .Conv2D('conv277bb', ch, [1,7])())
nl = BNReLU(is_training, decay=0.9997, epsilon=1e-3) nl = BNReLU(decay=0.9997, epsilon=1e-3)
with argscope(Conv2D, nl=nl, use_bias=False): with argscope(Conv2D, nl=nl, use_bias=False):
l = (LinearWrap(image) l = (LinearWrap(image)
.Conv2D('conv0', 32, 3, stride=2, padding='VALID') #299 .Conv2D('conv0', 32, 3, stride=2, padding='VALID') #299
...@@ -168,7 +168,7 @@ class Model(ModelDesc): ...@@ -168,7 +168,7 @@ class Model(ModelDesc):
l = GlobalAvgPooling('gap', l) l = GlobalAvgPooling('gap', l)
# 1x1x2048 # 1x1x2048
l = tf.nn.dropout(l, keep_prob=0.8 if is_training else 1) l = Dropout('drop', l, 0.8)
logits = FullyConnected('linear', l, out_dim=1000, nl=tf.identity) logits = FullyConnected('linear', l, out_dim=1000, nl=tf.identity)
loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(br1, label) loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(br1, label)
......
...@@ -39,7 +39,7 @@ class Model(ModelDesc): ...@@ -39,7 +39,7 @@ class Model(ModelDesc):
InputVar(tf.int32, (None,), 'action'), InputVar(tf.int32, (None,), 'action'),
InputVar(tf.float32, (None,), 'futurereward') ] InputVar(tf.float32, (None,), 'futurereward') ]
def _get_NN_prediction(self, image, is_training): def _get_NN_prediction(self, image):
""" image: [0,255]""" """ image: [0,255]"""
image = image / 255.0 image = image / 255.0
with argscope(Conv2D, nl=tf.nn.relu): with argscope(Conv2D, nl=tf.nn.relu):
...@@ -56,9 +56,9 @@ class Model(ModelDesc): ...@@ -56,9 +56,9 @@ class Model(ModelDesc):
policy = FullyConnected('fc-pi', l, out_dim=NUM_ACTIONS, nl=tf.identity) policy = FullyConnected('fc-pi', l, out_dim=NUM_ACTIONS, nl=tf.identity)
return policy return policy
def _build_graph(self, inputs, is_training): def _build_graph(self, inputs):
state, action, futurereward = inputs state, action, futurereward = inputs
policy = self._get_NN_prediction(state, is_training) policy = self._get_NN_prediction(state)
self.logits = tf.nn.softmax(policy, name='logits') self.logits = tf.nn.softmax(policy, name='logits')
def play_one_episode(player, func, verbose=False): def play_one_episode(player, func, verbose=False):
......
...@@ -37,7 +37,7 @@ class Model(ModelDesc): ...@@ -37,7 +37,7 @@ class Model(ModelDesc):
InputVar(tf.int32, [None], 'label') InputVar(tf.int32, [None], 'label')
] ]
def _build_graph(self, input_vars, is_training): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
image = image / 128.0 - 1 image = image / 128.0 - 1
...@@ -59,12 +59,12 @@ class Model(ModelDesc): ...@@ -59,12 +59,12 @@ class Model(ModelDesc):
with tf.variable_scope(name) as scope: with tf.variable_scope(name) as scope:
if not first: if not first:
b1 = BatchNorm('bn1', l, is_training) b1 = BatchNorm('bn1', l)
b1 = tf.nn.relu(b1) b1 = tf.nn.relu(b1)
else: else:
b1 = l b1 = l
c1 = conv('conv1', b1, out_channel, stride1) c1 = conv('conv1', b1, out_channel, stride1)
b2 = BatchNorm('bn2', c1, is_training) b2 = BatchNorm('bn2', c1)
b2 = tf.nn.relu(b2) b2 = tf.nn.relu(b2)
c2 = conv('conv2', b2, out_channel, 1) c2 = conv('conv2', b2, out_channel, 1)
...@@ -76,7 +76,7 @@ class Model(ModelDesc): ...@@ -76,7 +76,7 @@ class Model(ModelDesc):
return l return l
l = conv('conv0', image, 16, 1) l = conv('conv0', image, 16, 1)
l = BatchNorm('bn0', l, is_training) l = BatchNorm('bn0', l)
l = tf.nn.relu(l) l = tf.nn.relu(l)
l = residual('res1.0', l, first=True) l = residual('res1.0', l, first=True)
for k in range(1, self.n): for k in range(1, self.n):
...@@ -91,7 +91,7 @@ class Model(ModelDesc): ...@@ -91,7 +91,7 @@ class Model(ModelDesc):
l = residual('res3.0', l, increase_dim=True) l = residual('res3.0', l, increase_dim=True)
for k in range(1, self.n): for k in range(1, self.n):
l = residual('res3.' + str(k), l) l = residual('res3.' + str(k), l)
l = BatchNorm('bnlast', l, is_training) l = BatchNorm('bnlast', l)
l = tf.nn.relu(l) l = tf.nn.relu(l)
# 8,c=64 # 8,c=64
l = GlobalAvgPooling('gap', l) l = GlobalAvgPooling('gap', l)
......
...@@ -24,9 +24,8 @@ class Model(ModelDesc): ...@@ -24,9 +24,8 @@ class Model(ModelDesc):
return [InputVar(tf.float32, [None, 40, 40, 3], 'input'), return [InputVar(tf.float32, [None, 40, 40, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label') ]
def _build_graph(self, input_vars, is_training): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
keep_prob = tf.constant(0.5 if is_training else 1.0)
image = image / 128.0 - 1 image = image / 128.0 - 1
...@@ -37,7 +36,7 @@ class Model(ModelDesc): ...@@ -37,7 +36,7 @@ class Model(ModelDesc):
.Conv2D('conv3', 32, 3, padding='VALID') .Conv2D('conv3', 32, 3, padding='VALID')
.MaxPooling('pool2', 2, padding='SAME') .MaxPooling('pool2', 2, padding='SAME')
.Conv2D('conv4', 64, 3, padding='VALID') .Conv2D('conv4', 64, 3, padding='VALID')
.tf.nn.dropout(keep_prob) .Dropout('drop', 0.5)
.FullyConnected('fc0', 512, .FullyConnected('fc0', 512,
b_init=tf.constant_initializer(0.1)) b_init=tf.constant_initializer(0.1))
.FullyConnected('linear', out_dim=10, nl=tf.identity)()) .FullyConnected('linear', out_dim=10, nl=tf.identity)())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment