Commit bde41788 authored by drallensmith's avatar drallensmith

Further work on hand_coded_defense_agent.py from instrumentation

parent 127e88ae
...@@ -7,8 +7,6 @@ keeper/goalie. ...@@ -7,8 +7,6 @@ keeper/goalie.
from __future__ import print_function from __future__ import print_function
# encoding: utf-8 # encoding: utf-8
#MODIFIED#
# First Start the server: $> bin/start.py # First Start the server: $> bin/start.py
import argparse import argparse
import itertools import itertools
...@@ -83,15 +81,17 @@ def get_sorted_opponents(state_vec, num_opponents, num_teammates, pos_x, pos_y): ...@@ -83,15 +81,17 @@ def get_sorted_opponents(state_vec, num_opponents, num_teammates, pos_x, pos_y):
def is_in_open_area(pos_x, ignored_pos_y): def is_in_open_area(pos_x, ignored_pos_y):
return pos_x >= params['OPEN_AREA_HIGH_LIMIT_X'] return pos_x >= params['OPEN_AREA_HIGH_LIMIT_X']
num_times_intercept = 0 def add_num_times(action, main_dict, opt_dict=None):
num_times_move = 0 main_dict[action] += 1
num_times_RATG = 0 if opt_dict:
num_times_GTB = 0 opt_dict[action] += 1
num_times_MP = 0 return action
def do_defense_action(state_vec, hfo_env, episode, def do_defense_action(state_vec, hfo_env,
num_opponents, num_teammates, num_opponents, num_teammates,
old_ball_pos_x, old_ball_pos_y): old_ball_pos_x, old_ball_pos_y,
num_times_overall, num_times_kickable,
misc_tracked):
"""Figures out and does the (hopefully) best defense action.""" """Figures out and does the (hopefully) best defense action."""
min_vec_size = 10 + (6*num_teammates) + (3*num_opponents) min_vec_size = 10 + (6*num_teammates) + (3*num_opponents)
if (len(state_vec) < min_vec_size): if (len(state_vec) < min_vec_size):
...@@ -107,8 +107,6 @@ def do_defense_action(state_vec, hfo_env, episode, ...@@ -107,8 +107,6 @@ def do_defense_action(state_vec, hfo_env, episode,
hfo_env.act(hfo.MOVE) # will be Reorient in that version hfo_env.act(hfo.MOVE) # will be Reorient in that version
return return
global num_times_intercept, num_times_move, num_times_RATG, num_times_GTB, num_times_MP
ball_toward_goal = ball_moving_toward_goal(ball_pos_x, ball_pos_y, ball_toward_goal = ball_moving_toward_goal(ball_pos_x, ball_pos_y,
old_ball_pos_x, old_ball_pos_y) old_ball_pos_x, old_ball_pos_y)
...@@ -118,22 +116,19 @@ def do_defense_action(state_vec, hfo_env, episode, ...@@ -118,22 +116,19 @@ def do_defense_action(state_vec, hfo_env, episode,
ball_sorted_list = get_sorted_opponents(state_vec, num_opponents, num_teammates, ball_sorted_list = get_sorted_opponents(state_vec, num_opponents, num_teammates,
pos_x=ball_pos_x, pos_y=ball_pos_y) pos_x=ball_pos_x, pos_y=ball_pos_y)
if not ball_sorted_list: # unknown opponent positions/unums if not ball_sorted_list: # unknown opponent positions/unums
print("No known opponent locations " + print("No known opponent locations (btg {0!r}; bng {1!r}; ".format(ball_toward_goal,
"(episode {0:d}; btg {1!r}; ".format(episode,ball_toward_goal) + ball_nearer_goal) +
"ball xy {0:n}, {1:n}; ball old xy {2:n}, {3:n})".format(ball_pos_x, "ball xy {0:n}, {1:n}; ball old xy {2:n}, {3:n})".format(ball_pos_x,
ball_pos_y, ball_pos_y,
old_ball_pos_x, old_ball_pos_x,
old_ball_pos_y)) old_ball_pos_y))
if ball_toward_goal and (not is_in_open_area(ball_pos_x, ball_pos_y)): if ball_toward_goal:
if ball_nearer_goal: if ball_nearer_goal:
num_times_RATG += 1 hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
else: else:
num_times_intercept += 1 hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall))
hfo_env.act(hfo.INTERCEPT)
else: else:
num_times_move += 1 hfo_env.act(add_num_times(hfo.MOVE,num_times_overall))
hfo_env.act(hfo.MOVE)
return return
goal_sorted_list = get_sorted_opponents(state_vec, num_opponents, num_teammates, goal_sorted_list = get_sorted_opponents(state_vec, num_opponents, num_teammates,
...@@ -150,117 +145,90 @@ def do_defense_action(state_vec, hfo_env, episode, ...@@ -150,117 +145,90 @@ def do_defense_action(state_vec, hfo_env, episode,
ball_sorted_list[0][1], ball_sorted_list[0][1],
ball_sorted_list[0][2], ball_sorted_list[0][3]) ball_sorted_list[0][2], ball_sorted_list[0][3])
agent_to_ball_dist = get_dist_normalized(agent_pos_x, agent_pos_y,
ball_pos_x, ball_pos_y)
if state_vec[5] > 0: # kickable distance of player if state_vec[5] > 0: # kickable distance of player
misc_tracked['max_kickable_dist'] = max(agent_to_ball_dist,misc_tracked['max_kickable_dist'])
if is_tackleable_opp: if is_tackleable_opp:
num_times_move += 1 hfo_env.act(add_num_times(hfo.MOVE,num_times_overall,num_times_kickable)) # will do tackle
hfo_env.act(hfo.MOVE) # will do tackle
elif ball_nearer_goal: elif ball_nearer_goal:
num_times_RATG += 1 hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall,num_times_kickable))
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
elif ball_toward_goal: elif ball_toward_goal:
num_times_intercept += 1 hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall,num_times_kickable))
hfo_env.act(hfo.INTERCEPT)
else: else:
num_times_GTB += 1 hfo_env.act(add_num_times(hfo.GO_TO_BALL,num_times_overall,num_times_kickable))
hfo_env.act(hfo.GO_TO_BALL)
return return
agent_to_ball_dist = get_dist_normalized(agent_pos_x, agent_pos_y,
ball_pos_x, ball_pos_y)
if goal_sorted_list[0][0] != ball_sorted_list[0][0]: if goal_sorted_list[0][0] != ball_sorted_list[0][0]:
if is_in_open_area(ball_sorted_list[0][2], if is_in_open_area(ball_sorted_list[0][2],
ball_sorted_list[0][3]) and is_in_open_area(goal_sorted_list[0][2], ball_sorted_list[0][3]) and is_in_open_area(goal_sorted_list[0][2],
goal_sorted_list[0][3]): goal_sorted_list[0][3]):
if ball_sorted_list[0][1] < params['LOW_KICK_DIST']: if ball_sorted_list[0][1] < params['LOW_KICK_DIST']:
num_times_MP += 1 hfo_env.act(add_num_times(hfo.MARK_PLAYER,num_times_overall),
hfo_env.act(hfo.MARK_PLAYER, goal_sorted_list[0][0]) goal_sorted_list[0][0])
elif agent_to_ball_dist < ball_sorted_list[0][1]: elif agent_to_ball_dist < ball_sorted_list[0][1]:
if ball_nearer_goal: if ball_nearer_goal:
num_times_RATG += 1 hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
elif ball_toward_goal: elif ball_toward_goal:
num_times_intercept += 1 hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall))
hfo_env.act(hfo.INTERCEPT)
else: else:
num_times_GTB += 1 hfo_env.act(add_num_times(hfo.GO_TO_BALL,num_times_overall))
hfo_env.act(hfo.GO_TO_BALL)
else: else:
num_times_RATG += 1 hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
elif ball_sorted_list[0][1] >= params['KICK_DIST']: elif ball_sorted_list[0][1] >= params['KICK_DIST']:
if agent_to_ball_dist < ball_sorted_list[0][1]: if agent_to_ball_dist < ball_sorted_list[0][1]:
if ball_nearer_goal: if ball_nearer_goal:
num_times_RATG += 1 hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
elif ball_toward_goal: elif ball_toward_goal:
num_times_intercept += 1 hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall))
hfo_env.act(hfo.INTERCEPT)
else: else:
num_times_GTB += 1 hfo_env.act(add_num_times(hfo.GO_TO_BALL,num_times_overall))
hfo_env.act(hfo.GO_TO_BALL)
else: else:
num_times_RATG += 1 hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
elif is_tackleable_opp and (not is_in_open_area(ball_sorted_list[0][2], elif is_tackleable_opp and (not is_in_open_area(ball_sorted_list[0][2],
ball_sorted_list[0][3])): ball_sorted_list[0][3])):
num_times_move += 1 hfo_env.act(add_num_times(hfo.MOVE,num_times_overall))
hfo_env.act(hfo.MOVE)
elif ball_sorted_list[0][1] < (1*params['LOW_KICK_DIST']): elif ball_sorted_list[0][1] < (1*params['LOW_KICK_DIST']):
num_times_MP += 1 hfo_env.act(add_num_times(hfo.MARK_PLAYER,num_times_overall),
hfo_env.act(hfo.MARK_PLAYER, goal_sorted_list[0][0]) goal_sorted_list[0][0])
else: else:
num_times_RATG += 1 hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
return return
if is_in_open_area(ball_sorted_list[0][2],ball_sorted_list[0][3]): if is_in_open_area(ball_sorted_list[0][2],ball_sorted_list[0][3]):
if ball_sorted_list[0][1] < params['KICK_DIST']: if ball_sorted_list[0][1] < params['KICK_DIST']:
num_times_RATG += 1 hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
elif agent_to_ball_dist < params['KICK_DIST']: elif agent_to_ball_dist < params['KICK_DIST']:
if ball_nearer_goal: if ball_nearer_goal:
num_times_RATG += 1 hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
elif ball_toward_goal: elif ball_toward_goal:
num_times_intercept += 1 hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall))
hfo_env.act(hfo.INTERCEPT)
else: else:
num_times_GTB += 1 hfo_env.act(add_num_times(hfo.GO_TO_BALL,num_times_overall))
hfo_env.act(hfo.GO_TO_BALL)
elif is_tackleable_opp: elif is_tackleable_opp:
num_times_move += 1 hfo_env.act(add_num_times(hfo.MOVE,num_times_overall))
hfo_env.act(hfo.MOVE)
else: else:
num_times_RATG += 1 hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
else: else:
if ball_sorted_list[0][1] >= max(params['KICK_DIST'],agent_to_ball_dist): if ball_sorted_list[0][1] >= max(params['KICK_DIST'],agent_to_ball_dist):
if ball_nearer_goal: if ball_nearer_goal:
num_times_RATG += 1 hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
elif ball_toward_goal: elif ball_toward_goal:
num_times_intercept += 1 hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall))
hfo_env.act(hfo.INTERCEPT)
else: else:
num_times_GTB += 1 hfo_env.act(add_num_times(hfo.GO_TO_BALL,num_times_overall))
hfo_env.act(hfo.GO_TO_BALL)
elif ball_sorted_list[0][1] >= params['KICK_DIST']: elif ball_sorted_list[0][1] >= params['KICK_DIST']:
num_times_RATG += 1 hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
elif is_tackleable_opp: elif is_tackleable_opp:
num_times_move += 1 hfo_env.act(add_num_times(hfo.MOVE,num_times_overall))
hfo_env.act(hfo.MOVE)
else: else:
num_times_RATG += 1 hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
return return
def do_random_defense_action(state, hfo_env): def do_random_defense_action(state, hfo_env):
if state[5] > 0: # kickable if state[5] > 0: # kickable
hfo_env.act(random.choice([hfo.REDUCE_ANGLE_TO_GOAL, hfo_env.act(hfo.MOVE)
hfo.MOVE,
HFO.GO_TO_BALL]))
else: else:
if random.random() < 0.25: if random.random() < 0.25:
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL) hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
...@@ -305,7 +273,12 @@ def main(): ...@@ -305,7 +273,12 @@ def main():
my_unum = hfo_env.getUnum() my_unum = hfo_env.getUnum()
assert ((my_unum > 1) and (my_unum <= 11)), "Bad unum {!r}".format(my_unum) assert ((my_unum > 1) and (my_unum <= 11)), "Bad unum {!r}".format(my_unum)
print("My unum is {0:d}".format(my_unum)) print("My unum is {0:d}".format(my_unum))
global num_times_intercept, num_times_move, num_times_RATG, num_times_GTB, num_times_MP num_times_overall = {}
num_times_kickable = {}
for action in range(hfo.NUM_HFO_ACTIONS):
num_times_overall[action] = 0
num_times_kickable[action] = 0
misc_tracked = {'max_kickable_dist':0}
for episode in itertools.count(): for episode in itertools.count():
old_ball_pos_x = -1 old_ball_pos_x = -1
old_ball_pos_y = 0 old_ball_pos_y = 0
...@@ -322,9 +295,12 @@ def main(): ...@@ -322,9 +295,12 @@ def main():
if (args.epsilon > 0) and (random.random() < args.epsilon): if (args.epsilon > 0) and (random.random() < args.epsilon):
do_random_defense_action(state, hfo_env) do_random_defense_action(state, hfo_env)
else: else:
do_defense_action(state_vec=state, hfo_env=hfo_env, episode=episode, do_defense_action(state_vec=state, hfo_env=hfo_env,
num_opponents=numOpponents, num_teammates=numTeammates, num_opponents=numOpponents, num_teammates=numTeammates,
old_ball_pos_x=old_ball_pos_x, old_ball_pos_y=old_ball_pos_y) old_ball_pos_x=old_ball_pos_x, old_ball_pos_y=old_ball_pos_y,
num_times_overall=num_times_overall,
num_times_kickable=num_times_kickable,
misc_tracked=misc_tracked)
old_ball_pos_x=state[3] old_ball_pos_x=state[3]
old_ball_pos_y=state[4] old_ball_pos_y=state[4]
status=hfo_env.step() status=hfo_env.step()
...@@ -332,11 +308,15 @@ def main(): ...@@ -332,11 +308,15 @@ def main():
# Quit if the server goes down # Quit if the server goes down
if status == hfo.SERVER_DOWN: if status == hfo.SERVER_DOWN:
print("Intercept: {0:d}; Move: {1:d}".format(num_times_intercept, for action in range(hfo.NUM_HFO_ACTIONS):
num_times_move)) if num_times_overall[action]:
print("RATG: {0:d}; GTB: {1:d}; MP: {2:d}".format(num_times_RATG, print("Overall times {0!s}: {1:d}".format(hfo_env.actionToString(action),
num_times_GTB, num_times_overall[action]))
num_times_MP)) for action in range(hfo.NUM_HFO_ACTIONS):
if num_times_kickable[action]:
print("Kickable times {0!s}: {1:d}".format(hfo_env.actionToString(action),
num_times_kickable[action]))
print("Max kickable dist: {0:n}".format(misc_tracked['max_kickable_dist']))
hfo_env.act(hfo.QUIT) hfo_env.act(hfo.QUIT)
exit() exit()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment