Commit a6a0889a authored by Yuxin Wu's avatar Yuxin Wu

migrate to new model interface without is_training

parent faa6f17c
......@@ -70,7 +70,7 @@ class Model(ModelDesc):
InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'next_state'),
InputVar(tf.bool, (None,), 'isOver') ]
def _get_DQN_prediction(self, image, is_training):
def _get_DQN_prediction(self, image):
""" image: [0,255]"""
image = image / 255.0
with argscope(Conv2D, nl=PReLU.f, use_bias=True):
......@@ -91,9 +91,9 @@ class Model(ModelDesc):
.FullyConnected('fc0', 512, nl=lambda x, name: LeakyReLU.f(x, 0.01, name))
.FullyConnected('fct', NUM_ACTIONS, nl=tf.identity)())
def _build_graph(self, inputs, is_training):
def _build_graph(self, inputs):
state, action, reward, next_state, isOver = inputs
self.predict_value = self._get_DQN_prediction(state, is_training)
self.predict_value = self._get_DQN_prediction(state)
action_onehot = tf.one_hot(action, NUM_ACTIONS, 1.0, 0.0)
pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) #N,
max_pred_reward = tf.reduce_mean(tf.reduce_max(
......@@ -101,14 +101,14 @@ class Model(ModelDesc):
add_moving_summary(max_pred_reward)
with tf.variable_scope('target'):
targetQ_predict_value = self._get_DQN_prediction(next_state, False) # NxA
targetQ_predict_value = self._get_DQN_prediction(next_state) # NxA
# DQN
#best_v = tf.reduce_max(targetQ_predict_value, 1) # N,
# Double-DQN
tf.get_variable_scope().reuse_variables()
next_predict_value = self._get_DQN_prediction(next_state, is_training)
next_predict_value = self._get_DQN_prediction(next_state)
self.greedy_choice = tf.argmax(next_predict_value, 1) # N,
predict_onehot = tf.one_hot(self.greedy_choice, NUM_ACTIONS, 1.0, 0.0)
best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)
......
......@@ -30,7 +30,7 @@ class Model(ModelDesc):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
def _build_graph(self, input_vars, is_training):
def _build_graph(self, input_vars):
image, label = input_vars
image = image / 128.0
......@@ -57,7 +57,7 @@ class Model(ModelDesc):
outs.append(x4)
return tf.concat(3, outs, name='concat')
with argscope(Conv2D, nl=BNReLU(is_training), use_bias=False):
with argscope(Conv2D, nl=BNReLU(), use_bias=False):
l = Conv2D('conv0', image, 64, 7, stride=2)
l = MaxPooling('pool0', l, 3, 2, padding='SAME')
l = Conv2D('conv1', l, 64, 1)
......
......@@ -33,7 +33,7 @@ class Model(ModelDesc):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
def _build_graph(self, input_vars, is_training):
def _build_graph(self, input_vars):
image, label = input_vars
image = image / 128.0 - 1 # ?
......@@ -69,7 +69,7 @@ class Model(ModelDesc):
.Conv2D('conv277ba', ch_r, [7,1])
.Conv2D('conv277bb', ch, [1,7])())
nl = BNReLU(is_training, decay=0.9997, epsilon=1e-3)
nl = BNReLU(decay=0.9997, epsilon=1e-3)
with argscope(Conv2D, nl=nl, use_bias=False):
l = (LinearWrap(image)
.Conv2D('conv0', 32, 3, stride=2, padding='VALID') #299
......@@ -168,7 +168,7 @@ class Model(ModelDesc):
l = GlobalAvgPooling('gap', l)
# 1x1x2048
l = tf.nn.dropout(l, keep_prob=0.8 if is_training else 1)
l = Dropout('drop', l, 0.8)
logits = FullyConnected('linear', l, out_dim=1000, nl=tf.identity)
loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(br1, label)
......
......@@ -39,7 +39,7 @@ class Model(ModelDesc):
InputVar(tf.int32, (None,), 'action'),
InputVar(tf.float32, (None,), 'futurereward') ]
def _get_NN_prediction(self, image, is_training):
def _get_NN_prediction(self, image):
""" image: [0,255]"""
image = image / 255.0
with argscope(Conv2D, nl=tf.nn.relu):
......@@ -56,9 +56,9 @@ class Model(ModelDesc):
policy = FullyConnected('fc-pi', l, out_dim=NUM_ACTIONS, nl=tf.identity)
return policy
def _build_graph(self, inputs, is_training):
def _build_graph(self, inputs):
state, action, futurereward = inputs
policy = self._get_NN_prediction(state, is_training)
policy = self._get_NN_prediction(state)
self.logits = tf.nn.softmax(policy, name='logits')
def play_one_episode(player, func, verbose=False):
......
......@@ -37,7 +37,7 @@ class Model(ModelDesc):
InputVar(tf.int32, [None], 'label')
]
def _build_graph(self, input_vars, is_training):
def _build_graph(self, input_vars):
image, label = input_vars
image = image / 128.0 - 1
......@@ -59,12 +59,12 @@ class Model(ModelDesc):
with tf.variable_scope(name) as scope:
if not first:
b1 = BatchNorm('bn1', l, is_training)
b1 = BatchNorm('bn1', l)
b1 = tf.nn.relu(b1)
else:
b1 = l
c1 = conv('conv1', b1, out_channel, stride1)
b2 = BatchNorm('bn2', c1, is_training)
b2 = BatchNorm('bn2', c1)
b2 = tf.nn.relu(b2)
c2 = conv('conv2', b2, out_channel, 1)
......@@ -76,7 +76,7 @@ class Model(ModelDesc):
return l
l = conv('conv0', image, 16, 1)
l = BatchNorm('bn0', l, is_training)
l = BatchNorm('bn0', l)
l = tf.nn.relu(l)
l = residual('res1.0', l, first=True)
for k in range(1, self.n):
......@@ -91,7 +91,7 @@ class Model(ModelDesc):
l = residual('res3.0', l, increase_dim=True)
for k in range(1, self.n):
l = residual('res3.' + str(k), l)
l = BatchNorm('bnlast', l, is_training)
l = BatchNorm('bnlast', l)
l = tf.nn.relu(l)
# 8,c=64
l = GlobalAvgPooling('gap', l)
......
......@@ -24,9 +24,8 @@ class Model(ModelDesc):
return [InputVar(tf.float32, [None, 40, 40, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
def _build_graph(self, input_vars, is_training):
def _build_graph(self, input_vars):
image, label = input_vars
keep_prob = tf.constant(0.5 if is_training else 1.0)
image = image / 128.0 - 1
......@@ -37,7 +36,7 @@ class Model(ModelDesc):
.Conv2D('conv3', 32, 3, padding='VALID')
.MaxPooling('pool2', 2, padding='SAME')
.Conv2D('conv4', 64, 3, padding='VALID')
.tf.nn.dropout(keep_prob)
.Dropout('drop', 0.5)
.FullyConnected('fc0', 512,
b_init=tf.constant_initializer(0.1))
.FullyConnected('linear', out_dim=10, nl=tf.identity)())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment