Commit 3191ab74 authored by drallensmith's avatar drallensmith

Merge branch 'randomization' into add_preprocess_action - bring in improvements, avoid rebase

parents ca27922d 9df16499
...@@ -53,7 +53,11 @@ def launch(cmd, name = 'Unknown', necessary = True, suppressOutput = True): ...@@ -53,7 +53,11 @@ def launch(cmd, name = 'Unknown', necessary = True, suppressOutput = True):
def main(args): def main(args):
"""Sets up the teams, launches the server and monitor, starts the trainer. """Sets up the teams, launches the server and monitor, starts the trainer.
""" """
if args.logging and not os.path.exists(args.logDir): if args.logging:
args.hfo_logging = True
if not os.path.exists(args.logDir):
os.makedirs(args.logDir)
elif args.hfo_logging and not os.path.exists(args.logDir):
os.makedirs(args.logDir) os.makedirs(args.logDir)
num_agents = args.offenseAgents + args.defenseAgents num_agents = args.offenseAgents + args.defenseAgents
binary_dir = os.path.dirname(os.path.realpath(__file__)) binary_dir = os.path.dirname(os.path.realpath(__file__))
...@@ -77,7 +81,7 @@ def main(args): ...@@ -77,7 +81,7 @@ def main(args):
'server::say_msg_size=%i ' \ 'server::say_msg_size=%i ' \
'server::record_messages=%i' \ 'server::record_messages=%i' \
%(server_port, coach_port, olcoach_port, %(server_port, coach_port, olcoach_port,
args.logging, args.logging, args.logging, args.logging, args.logging, args.hfo_logging,
args.logDir, args.logDir, args.logDir, args.logDir, args.logDir, args.logDir,
args.sync, args.fullstate, args.fullstate, args.sync, args.fullstate, args.fullstate,
args.maxFramesPerTrial, args.numTrials, args.numFrames, args.maxFramesPerTrial, args.numTrials, args.numFrames,
...@@ -166,6 +170,8 @@ def parseArgs(): ...@@ -166,6 +170,8 @@ def parseArgs():
'incrementally allocated the following ports.') 'incrementally allocated the following ports.')
p.add_argument('--no-logging', dest='logging', action='store_false', p.add_argument('--no-logging', dest='logging', action='store_false',
default=True, help='Disable rcssserver logging.') default=True, help='Disable rcssserver logging.')
p.add_argument('--hfo-logging', dest='hfo_logging', action='store_true', default=False,
help="Do .hfo logging even if no other logging")
p.add_argument('--log-dir', dest='logDir', default='log/', p.add_argument('--log-dir', dest='logDir', default='log/',
help='Directory to store logs. Default: log/') help='Directory to store logs. Default: log/')
p.add_argument('--record', dest='record', action='store_true', p.add_argument('--record', dest='record', action='store_true',
......
#!/bin/bash
./bin/HFO --offense-npcs=2 --defense-npcs=2 --trials 20 --headless &
# The magic line
# $$ holds the PID for this script
# Negation means kill by process group id instead of PID
trap "kill -TERM -$$" SIGINT
wait
#!/bin/bash
# Be sure to change/remove the seed for different experiments!
./bin/HFO --offense-npcs=2 --defense-npcs=2 --trials 5000 --headless --seed=1500348586 --no-logging --hfo-logging &
# The magic line
# $$ holds the PID for this script
# Negation means kill by process group id instead of PID
trap "kill -TERM -$$" SIGINT
wait
#!/bin/bash
# HAS TO BE RUN FROM EXAMPLE DIR DUE TO hand_coded_defense_agent CONFIG!
../bin/HFO --offense-npcs=2 --defense-agents=1 --defense-npcs=2 --trials 20 --headless --port=7000 &
# The below sleep period is needed to avoid the agent connecting in before the
# Trainer.py script gets the base/Helios goalie connected in; if that happens,
# the agent gets assigned unum 1 and there is a mixup in which agent is
# supposed to be the goalie (some portions of the various programs go by unum,
# others go by a goalie flag).
sleep 15
./hand_coded_defense_agent &> agent1.txt &
sleep 5
# The magic line
# $$ holds the PID for this script
# Negation means kill by process group id instead of PID
trap "kill -TERM -$$" SIGINT
wait
\ No newline at end of file
...@@ -2,7 +2,9 @@ ...@@ -2,7 +2,9 @@
# HAS TO BE RUN FROM EXAMPLE DIR DUE TO hand_coded_defense_agent CONFIG! # HAS TO BE RUN FROM EXAMPLE DIR DUE TO hand_coded_defense_agent CONFIG!
../bin/HFO --offense-npcs=2 --defense-agents=1 --defense-npcs=1 --trials 5000 --headless --port=7000 --seed 1500310928 --no-logging & # Change to a new seed for different experiments!
../bin/HFO --offense-npcs=2 --defense-agents=1 --defense-npcs=1 --trials 5000 --headless --port=7000 --seed 1500348586 --no-logging &
sleep 15 sleep 15
./hand_coded_defense_agent &> agent1.txt & ./hand_coded_defense_agent &> agent1.txt &
sleep 5 sleep 5
......
#!/bin/bash #!/bin/bash
./bin/HFO --offense-npcs=2 --defense-agents=1 --defense-npcs=1 --trials 5000 --headless --seed 1500310928 --no-logging & # Change to a different seed for different experiments!
./bin/HFO --offense-npcs=2 --defense-agents=1 --defense-npcs=1 --trials 5000 --headless --seed 1500348586 --no-logging &
sleep 15 sleep 15
./example/hand_coded_defense_agent.py &> agent1.txt & ./example/hand_coded_defense_agent.py &> agent1.txt &
sleep 5 sleep 5
......
...@@ -66,7 +66,7 @@ bool is_in_open_area(double pos_x, double pos_y) { ...@@ -66,7 +66,7 @@ bool is_in_open_area(double pos_x, double pos_y) {
} }
} }
action_with_params get_defense_action(const std::vector<float>& state_vec, double no_of_opponents, double numTMates) { action_with_params get_defense_action(const std::vector<float>& state_vec, double no_of_opponents, int numTMates) {
int size_of_vec = 10 + 6*numTMates + 3*no_of_opponents; int size_of_vec = 10 + 6*numTMates + 3*no_of_opponents;
if (size_of_vec != state_vec.size()) { if (size_of_vec != state_vec.size()) {
std :: cout <<"Invalid Feature Vector / Check the number of teammates/opponents provided"; std :: cout <<"Invalid Feature Vector / Check the number of teammates/opponents provided";
...@@ -221,7 +221,7 @@ int main(int argc, char** argv) { ...@@ -221,7 +221,7 @@ int main(int argc, char** argv) {
// Get the vector of state features for the current state // Get the vector of state features for the current state
const vector<float>& feature_vec = hfo.getState(); const vector<float>& feature_vec = hfo.getState();
if (random == 0) { if (random == 0) {
action_with_params a = get_defense_action(feature_vec, 2,1); action_with_params a = get_defense_action(feature_vec, 2, hfo.getNumTeammates());
// std::cout << a.action << a.param; // std::cout << a.action << a.param;
if (a.action == hfo :: MARK_PLAYER || a.action == hfo::TACKLE) { if (a.action == hfo :: MARK_PLAYER || a.action == hfo::TACKLE) {
hfo.act(a.action, a.param); hfo.act(a.action, a.param);
......
...@@ -7,8 +7,6 @@ keeper/goalie. ...@@ -7,8 +7,6 @@ keeper/goalie.
from __future__ import print_function from __future__ import print_function
# encoding: utf-8 # encoding: utf-8
#MODIFIED#
# First Start the server: $> bin/start.py # First Start the server: $> bin/start.py
import argparse import argparse
import itertools import itertools
...@@ -83,9 +81,17 @@ def get_sorted_opponents(state_vec, num_opponents, num_teammates, pos_x, pos_y): ...@@ -83,9 +81,17 @@ def get_sorted_opponents(state_vec, num_opponents, num_teammates, pos_x, pos_y):
def is_in_open_area(pos_x, ignored_pos_y): def is_in_open_area(pos_x, ignored_pos_y):
return pos_x >= params['OPEN_AREA_HIGH_LIMIT_X'] return pos_x >= params['OPEN_AREA_HIGH_LIMIT_X']
def do_defense_action(state_vec, hfo_env, episode, def add_num_times(action, main_dict, opt_dict=None):
main_dict[action] += 1
if opt_dict:
opt_dict[action] += 1
return action
def do_defense_action(state_vec, hfo_env,
num_opponents, num_teammates, num_opponents, num_teammates,
old_ball_pos_x, old_ball_pos_y): old_ball_pos_x, old_ball_pos_y,
num_times_overall, num_times_kickable,
misc_tracked):
"""Figures out and does the (hopefully) best defense action.""" """Figures out and does the (hopefully) best defense action."""
min_vec_size = 10 + (6*num_teammates) + (3*num_opponents) min_vec_size = 10 + (6*num_teammates) + (3*num_opponents)
if (len(state_vec) < min_vec_size): if (len(state_vec) < min_vec_size):
...@@ -110,19 +116,19 @@ def do_defense_action(state_vec, hfo_env, episode, ...@@ -110,19 +116,19 @@ def do_defense_action(state_vec, hfo_env, episode,
ball_sorted_list = get_sorted_opponents(state_vec, num_opponents, num_teammates, ball_sorted_list = get_sorted_opponents(state_vec, num_opponents, num_teammates,
pos_x=ball_pos_x, pos_y=ball_pos_y) pos_x=ball_pos_x, pos_y=ball_pos_y)
if not ball_sorted_list: # unknown opponent positions/unums if not ball_sorted_list: # unknown opponent positions/unums
print("No known opponent locations " + print("No known opponent locations (btg {0!r}; bng {1!r}; ".format(ball_toward_goal,
"(episode {0:d}; btg {1!r}; ".format(episode,ball_toward_goal) + ball_nearer_goal) +
"ball xy {0:n}, {1:n}; ball old xy {2:n}, {3:n})".format(ball_pos_x, "ball xy {0:n}, {1:n}; ball old xy {2:n}, {3:n})".format(ball_pos_x,
ball_pos_y, ball_pos_y,
old_ball_pos_x, old_ball_pos_x,
old_ball_pos_y)) old_ball_pos_y))
if ball_toward_goal and (not is_in_open_area(ball_pos_x, ball_pos_y)): if ball_toward_goal:
if ball_nearer_goal: if ball_nearer_goal:
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL) hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
else: else:
hfo_env.act(hfo.INTERCEPT) hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall))
else: else:
hfo_env.act(hfo.MOVE) hfo_env.act(add_num_times(hfo.MOVE,num_times_overall))
return return
goal_sorted_list = get_sorted_opponents(state_vec, num_opponents, num_teammates, goal_sorted_list = get_sorted_opponents(state_vec, num_opponents, num_teammates,
...@@ -139,96 +145,96 @@ def do_defense_action(state_vec, hfo_env, episode, ...@@ -139,96 +145,96 @@ def do_defense_action(state_vec, hfo_env, episode,
ball_sorted_list[0][1], ball_sorted_list[0][1],
ball_sorted_list[0][2], ball_sorted_list[0][3]) ball_sorted_list[0][2], ball_sorted_list[0][3])
agent_to_ball_dist = get_dist_normalized(agent_pos_x, agent_pos_y,
ball_pos_x, ball_pos_y)
if state_vec[5] > 0: # kickable distance of player if state_vec[5] > 0: # kickable distance of player
misc_tracked['max_kickable_dist'] = max(agent_to_ball_dist,misc_tracked['max_kickable_dist'])
if is_tackleable_opp: if is_tackleable_opp:
hfo_env.act(hfo.MOVE) # will do tackle hfo_env.act(add_num_times(hfo.MOVE,num_times_overall,num_times_kickable)) # will do tackle
elif ball_nearer_goal: elif ball_nearer_goal:
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL) hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall,num_times_kickable))
elif ball_toward_goal: elif ball_toward_goal:
hfo_env.act(hfo.INTERCEPT) hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall,num_times_kickable))
else: else:
hfo_env.act(hfo.GO_TO_BALL) hfo_env.act(add_num_times(hfo.GO_TO_BALL,num_times_overall,num_times_kickable))
return return
agent_to_ball_dist = get_dist_normalized(agent_pos_x, agent_pos_y,
ball_pos_x, ball_pos_y)
if goal_sorted_list[0][0] != ball_sorted_list[0][0]: if goal_sorted_list[0][0] != ball_sorted_list[0][0]:
if is_in_open_area(ball_sorted_list[0][2], if is_in_open_area(ball_sorted_list[0][2],
ball_sorted_list[0][3]) and is_in_open_area(goal_sorted_list[0][2], ball_sorted_list[0][3]) and is_in_open_area(goal_sorted_list[0][2],
goal_sorted_list[0][3]): goal_sorted_list[0][3]):
if ball_sorted_list[0][1] < params['LOW_KICK_DIST']: if ball_sorted_list[0][1] < params['LOW_KICK_DIST']:
hfo_env.act(hfo.MARK_PLAYER, goal_sorted_list[0][0]) hfo_env.act(add_num_times(hfo.MARK_PLAYER,num_times_overall),
goal_sorted_list[0][0])
elif agent_to_ball_dist < ball_sorted_list[0][1]: elif agent_to_ball_dist < ball_sorted_list[0][1]:
if ball_nearer_goal: if ball_nearer_goal:
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL) hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
elif ball_toward_goal: elif ball_toward_goal:
hfo_env.act(hfo.INTERCEPT) hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall))
else: else:
hfo_env.act(hfo.GO_TO_BALL) hfo_env.act(add_num_times(hfo.GO_TO_BALL,num_times_overall))
else: else:
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL) hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
elif ball_sorted_list[0][1] >= params['KICK_DIST']: elif ball_sorted_list[0][1] >= params['KICK_DIST']:
if agent_to_ball_dist < ball_sorted_list[0][1]: if agent_to_ball_dist < ball_sorted_list[0][1]:
if ball_nearer_goal: if ball_nearer_goal:
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL) hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
elif ball_toward_goal: elif ball_toward_goal:
hfo_env.act(hfo.INTERCEPT) hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall))
else: else:
hfo_env.act(hfo.GO_TO_BALL) hfo_env.act(add_num_times(hfo.GO_TO_BALL,num_times_overall))
else: else:
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL) hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
elif is_tackleable_opp and (not is_in_open_area(ball_sorted_list[0][2], elif is_tackleable_opp and (not is_in_open_area(ball_sorted_list[0][2],
ball_sorted_list[0][3])): ball_sorted_list[0][3])):
hfo_env.act(hfo.MOVE) hfo_env.act(add_num_times(hfo.MOVE,num_times_overall))
## elif is_in_open_area(ball_sorted_list[0][2],ball_sorted_list[0][3]):
## hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL) # why not MARK_PLAYER for the one that is not in the open area?
elif ball_sorted_list[0][1] < (1*params['LOW_KICK_DIST']): elif ball_sorted_list[0][1] < (1*params['LOW_KICK_DIST']):
hfo_env.act(hfo.MARK_PLAYER, goal_sorted_list[0][0]) hfo_env.act(add_num_times(hfo.MARK_PLAYER,num_times_overall),
goal_sorted_list[0][0])
else: else:
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL) hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
return return
if is_in_open_area(ball_sorted_list[0][2],ball_sorted_list[0][3]): if is_in_open_area(ball_sorted_list[0][2],ball_sorted_list[0][3]):
if ball_sorted_list[0][1] < params['KICK_DIST']: if ball_sorted_list[0][1] < params['KICK_DIST']:
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL) hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
elif agent_to_ball_dist < params['KICK_DIST']: elif agent_to_ball_dist < params['KICK_DIST']:
if ball_nearer_goal: if ball_nearer_goal:
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL) hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
elif ball_toward_goal: elif ball_toward_goal:
hfo_env.act(hfo.INTERCEPT) hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall))
else: else:
hfo_env.act(hfo.GO_TO_BALL) hfo_env.act(add_num_times(hfo.GO_TO_BALL,num_times_overall))
elif is_tackleable_opp: elif is_tackleable_opp:
hfo_env.act(hfo.MOVE) hfo_env.act(add_num_times(hfo.MOVE,num_times_overall))
else: else:
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL) hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
else: else:
if ball_sorted_list[0][1] >= max(params['KICK_DIST'],agent_to_ball_dist): if ball_sorted_list[0][1] >= max(params['KICK_DIST'],agent_to_ball_dist):
if ball_nearer_goal: if ball_nearer_goal:
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL) hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
elif ball_toward_goal: elif ball_toward_goal:
hfo_env.act(hfo.INTERCEPT) hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall))
else: else:
hfo_env.act(hfo.GO_TO_BALL) hfo_env.act(add_num_times(hfo.GO_TO_BALL,num_times_overall))
elif ball_sorted_list[0][1] >= params['KICK_DIST']: elif ball_sorted_list[0][1] >= params['KICK_DIST']:
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL) hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
elif is_tackleable_opp: elif is_tackleable_opp:
hfo_env.act(hfo.MOVE) hfo_env.act(add_num_times(hfo.MOVE,num_times_overall))
else: else:
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL) hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
return return
def do_random_defense_action(state, hfo_env): def do_random_defense_action(state, hfo_env):
if state[5] > 0: # kickable if state[5] > 0: # kickable
if random.random() < 0.5: hfo_env.act(hfo.MOVE)
hfo_env.act(hfo.INTERCEPT) else:
if random.random() < 0.25:
hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
else: else:
hfo_env.act(hfo.MOVE) hfo_env.act(hfo.MOVE)
else:
hfo_env.act(random.choose(hfo.MOVE,hfo.DEFEND_GOAL,
hfo.REDUCE_ANGLE_TO_GOAL,hfo.REDUCE_ANGLE_TO_GOAL,
hfo.GO_TO_BALL,hfo.INTERCEPT))
return return
def main(): def main():
...@@ -267,6 +273,12 @@ def main(): ...@@ -267,6 +273,12 @@ def main():
my_unum = hfo_env.getUnum() my_unum = hfo_env.getUnum()
assert ((my_unum > 1) and (my_unum <= 11)), "Bad unum {!r}".format(my_unum) assert ((my_unum > 1) and (my_unum <= 11)), "Bad unum {!r}".format(my_unum)
print("My unum is {0:d}".format(my_unum)) print("My unum is {0:d}".format(my_unum))
num_times_overall = {}
num_times_kickable = {}
for action in range(hfo.NUM_HFO_ACTIONS):
num_times_overall[action] = 0
num_times_kickable[action] = 0
misc_tracked = {'max_kickable_dist':0}
for episode in itertools.count(): for episode in itertools.count():
old_ball_pos_x = -1 old_ball_pos_x = -1
old_ball_pos_y = 0 old_ball_pos_y = 0
...@@ -283,9 +295,12 @@ def main(): ...@@ -283,9 +295,12 @@ def main():
if (args.epsilon > 0) and (random.random() < args.epsilon): if (args.epsilon > 0) and (random.random() < args.epsilon):
do_random_defense_action(state, hfo_env) do_random_defense_action(state, hfo_env)
else: else:
do_defense_action(state_vec=state, hfo_env=hfo_env, episode=episode, do_defense_action(state_vec=state, hfo_env=hfo_env,
num_opponents=numOpponents, num_teammates=numTeammates, num_opponents=numOpponents, num_teammates=numTeammates,
old_ball_pos_x=old_ball_pos_x, old_ball_pos_y=old_ball_pos_y) old_ball_pos_x=old_ball_pos_x, old_ball_pos_y=old_ball_pos_y,
num_times_overall=num_times_overall,
num_times_kickable=num_times_kickable,
misc_tracked=misc_tracked)
old_ball_pos_x=state[3] old_ball_pos_x=state[3]
old_ball_pos_y=state[4] old_ball_pos_y=state[4]
status=hfo_env.step() status=hfo_env.step()
...@@ -293,6 +308,15 @@ def main(): ...@@ -293,6 +308,15 @@ def main():
# Quit if the server goes down # Quit if the server goes down
if status == hfo.SERVER_DOWN: if status == hfo.SERVER_DOWN:
for action in range(hfo.NUM_HFO_ACTIONS):
if num_times_overall[action]:
print("Overall times {0!s}: {1:d}".format(hfo_env.actionToString(action),
num_times_overall[action]))
for action in range(hfo.NUM_HFO_ACTIONS):
if num_times_kickable[action]:
print("Kickable times {0!s}: {1:d}".format(hfo_env.actionToString(action),
num_times_kickable[action]))
print("Max kickable dist: {0:n}".format(misc_tracked['max_kickable_dist']))
hfo_env.act(hfo.QUIT) hfo_env.act(hfo.QUIT)
exit() exit()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment