Commit 208de18c authored by Yuxin Wu's avatar Yuxin Wu

use stat in DQN instead of reward

parent 5fd47e6d
...@@ -136,7 +136,6 @@ def current_predictor(state): ...@@ -136,7 +136,6 @@ def current_predictor(state):
return pred[0] return pred[0]
def play_one_episode(player, func, verbose=False): def play_one_episode(player, func, verbose=False):
tot_reward = 0
while True: while True:
s = player.current_state() s = player.current_state()
outputs = func([[s]]) outputs = func([[s]])
...@@ -149,9 +148,10 @@ def play_one_episode(player, func, verbose=False): ...@@ -149,9 +148,10 @@ def play_one_episode(player, func, verbose=False):
if verbose: if verbose:
print(act) print(act)
reward, isOver = player.action(act) reward, isOver = player.action(act)
tot_reward += reward
if isOver: if isOver:
return tot_reward sc = player.stats['score'][0]
player.reset_stat()
return sc
def play_model(model_path): def play_model(model_path):
player = PreventStuckPlayer(HistoryFramePlayer(get_player(0.01), FRAME_HISTORY), 30, 1) player = PreventStuckPlayer(HistoryFramePlayer(get_player(0.01), FRAME_HISTORY), 30, 1)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment