Merge branch 'randomization' into add_preprocess_action - bring in updates/fixes, avoid rebase

34a71542 · drallensmith · 260c2416 · 1c57d3f1 · 34a71542 · 34a71542
Commit 34a71542 authored Jul 16, 2017 by drallensmith
8 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -73,6 +73,7 @@ example/mid_level_move_agent
 example/mid_level_kick_agent
 example/mid_level_dribble_agent
 example/communication_agent
+example/hand_coded_defense_agent
 # Dependency directories
 librcsc-prefix/

--- a/example/defense_2v2.sh
+++ b/example/defense_2v2.sh
+#!/bin/bash
+# HAS TO BE RUN FROM EXAMPLE DIR DUE TO hand_coded_defense_agent CONFIG!
+../bin/HFO --offense-npcs=2 --defense-agents=1 --defense-npcs=1 --trials 20 --headless --port=7000 &
+sleep 5
+./hand_coded_defense_agent &> agent1.txt &
+sleep 5
+# The magic line
+#   $$ holds the PID for this script
+#   Negation means kill by process group id instead of PID
+trap "kill -TERM -$$" SIGINT
+wait
\ No newline at end of file
--- a/example/defense_python_2v2.sh
+++ b/example/defense_python_2v2.sh
+#!/bin/bash
+./bin/HFO --offense-npcs=2 --defense-agents=1 --defense-npcs=1 --trials 20 --headless &
+sleep 5
+./example/hand_coded_defense_agent.py &> agent1.txt &
+sleep 5
+# The magic line
+#   $$ holds the PID for this script
+#   Negation means kill by process group id instead of PID
+trap "kill -TERM -$$" SIGINT
+wait
\ No newline at end of file
--- a/example/defense_python_3v3.sh
+++ b/example/defense_python_3v3.sh
+#!/bin/bash
+./bin/HFO --offense-npcs=3 --defense-agents=1 --defense-npcs=2 --trials 20 --headless &
+sleep 5
+./example/hand_coded_defense_agent.py &> agent1.txt &
+sleep 5
+# The magic line
+#   $$ holds the PID for this script
+#   Negation means kill by process group id instead of PID
+trap "kill -TERM -$$" SIGINT
+wait
\ No newline at end of file
--- a/example/hand_coded_defense_agent
+++ b/example/hand_coded_defense_agent
--- a/example/hand_coded_defense_agent.cpp
+++ b/example/hand_coded_defense_agent.cpp
@@ -208,7 +208,8 @@ int main(int argc, char** argv) {
  HFOEnvironment hfo;
  int random = 0;
  double numGoals = 0;
-  double numEpisodes = 5000;
+  int numEpisodes = 5000;
+  double actualNumEpisodes = 0;
  // Connect to the server and request high-level feature set. See
  // manual for more information on feature sets.
  hfo.connectToServer(features, config_dir, port, server_addr,
@@ -233,10 +234,10 @@ int main(int argc, char** argv) {
              string s = hfo::ActionToString(a.action) + " " +to_string(a.param) + "\n";
             // std::cout << s;
      } else {
-                  std::cout <<"Randm";
+	std::cout <<"Random";
 	action_t a = get_random_high_lv_action();
 	if (a == hfo :: MARK_PLAYER) {
-                  hfo.act(NOOP);
+	  hfo.act(NOOP); // why not MOVE?
 	} else {
 	  hfo.act(a);
 	}
@@ -249,8 +250,14 @@ int main(int argc, char** argv) {
    // Check what the outcome of the episode was
    cout << "Episode " << episode << " ended with status: "
         << StatusToString(status) << std::endl;
+    if (status==SERVER_DOWN) {
+      break;
+    } else {
+      actualNumEpisodes++;
+    }
  }
-  double cost = numGoals/numEpisodes;
+  double cost = numGoals/actualNumEpisodes;
  hfo.act(QUIT);
  //write_cost(cost);
 };

--- a/example/hand_coded_defense_agent.py
+++ b/example/hand_coded_defense_agent.py
+#!/usr/bin/env python
+"""
+This is a hand-coded defense agent, using hand_coded_defense_agent.cpp as a starting point,
+that should be able to play, for instance, a 2v2 game againt 2 offense npcs. It requires a goal
+keeper/goalie.
+"""
+from __future__ import print_function
+# encoding: utf-8
+#MODIFIED#
+# First Start the server: $> bin/start.py
+import argparse
+import itertools
+import math
+import random
+try:
+  import hfo
+except ImportError:
+  print('Failed to import hfo. To install hfo, in the HFO directory'\
+    ' run: \"pip install .\"')
+  exit()
+GOAL_POS_X = 1.0
+GOAL_POS_Y = 0.0
+# below - from hand_coded_defense_agent.cpp except LOW_KICK_DIST
+HALF_FIELD_WIDTH = 68 # y coordinate -34 to 34 (-34 = bottom 34 = top)
+HALF_FIELD_LENGTH = 52.5 # x coordinate 0 to 52.5 (0 = goalline 52.5 = center)
+params = {'KICK_DIST':1.504052352, 'OPEN_AREA_HIGH_LIMIT_X':0.747311440447,
+          'TACKLE_DIST':1.613456553, 'LOW_KICK_DIST':(5/HALF_FIELD_LENGTH)}
+def get_dist_normalized(ref_x, ref_y, src_x, src_y):
+  return math.sqrt(math.pow((ref_x - src_x),2) +
+                   math.pow(((HALF_FIELD_WIDTH/HALF_FIELD_LENGTH)*(ref_y - src_y)),2))
+##def is_kickable(ball_pos_x, ball_pos_y, kicker_pos_x, kicker_pos_y):
+##  return get_dist_normalized(ball_pos_x, ball_pos_y,
+##                             kicker_pos_x, kicker_pos_y) < params['KICK_DIST']
+def is_tackleable(agent_pos_x, agent_pos_y, opp_pos_x, opp_pos_y):
+  return get_dist_normalized(agent_pos_x, agent_pos_y,
+                             opp_pos_x, opp_pos_y) < params['TACKLE_DIST']
+def ball_moving_toward_goal(ball_pos_x, ball_pos_y, old_ball_pos_x, old_ball_pos_y):
+  return get_dist_normalized(ball_pos_x, ball_pos_y,
+                             GOAL_POS_X, GOAL_POS_Y) < get_dist_normalized(old_ball_pos_x,
+                                                                           old_ball_pos_y,
+                                                                           GOAL_POS_X,
+                                                                           GOAL_POS_Y)
+def get_sorted_opponents(state_vec, num_opponents, num_teammates, pos_x, pos_y):
+  """
+  Returns a list of tuple(unum, dist, opp_pos_x, opp_pos_y),
+  sorted in increasing order of dist from the given position
+  """
+  unum_list = []
+  for i in range(num_opponents):
+    unum = state_vec[9+(i*3)+(6*num_teammates)+3]
+    if unum > 0:
+      opp_pos_x = state_vec[9+(i*3)+(6*num_teammates)+1]
+      opp_pos_y = state_vec[9+(i*3)+(6*num_teammates)+2]
+      dist = get_dist_normalized(pos_x, pos_y, opp_pos_x, opp_pos_y)
+      unum_list.append(tuple([unum, dist, opp_pos_x, opp_pos_y]))
+    # otherwise, unknown
+  if len(unum_list) > 1:
+    return sorted(unum_list, key=lambda x: x[1])
+  return unum_list
+def is_in_open_area(pos_x, ignored_pos_y):
+  return pos_x >= params['OPEN_AREA_HIGH_LIMIT_X']
+def do_defense_action(state_vec, hfo_env, episode,
+                      num_opponents, num_teammates,
+                      old_ball_pos_x, old_ball_pos_y):
+  """Figures out and does the (hopefully) best defense action."""
+  min_vec_size = 10 + (6*num_teammates) + (3*num_opponents)
+  if (len(state_vec) < min_vec_size):
+    raise LookupError("Feature vector length is {0:d} not {1:d}".format(len(state_vec),
+                                                                        min_vec_size))
+  agent_pos_x = state_vec[0]
+  agent_pos_y = state_vec[1]
+  ball_pos_x = state_vec[3]
+  ball_pos_y = state_vec[4]
+  # if get high_level working for invalid
+  if (min(agent_pos_x,agent_pos_y,ball_pos_x,ball_pos_y) < -2):
+    hfo_env.act(hfo.MOVE) # will be Reorient in that version
+    return
+  ball_toward_goal = ball_moving_toward_goal(ball_pos_x, ball_pos_y,
+                                             old_ball_pos_x, old_ball_pos_y)
+  ball_sorted_list = get_sorted_opponents(state_vec, num_opponents, num_teammates,
+                                          pos_x=ball_pos_x, pos_y=ball_pos_y)
+  if not ball_sorted_list: # unknown opponent positions/unums
+    print("No known opponent locations " +
+          "(episode {0:d}; btg {1!r}; ".format(episode,ball_toward_goal) +
+          "ball xy {0:n}, {1:n}; ball old xy {2:n}, {3:n})".format(ball_pos_x,
+                                                                   ball_pos_y,
+                                                                   old_ball_pos_x,
+                                                                   old_ball_pos_y))
+    if ball_toward_goal and (not is_in_open_area(ball_pos_x, ball_pos_y)):
+      hfo_env.act(hfo.INTERCEPT)
+    else:
+      hfo_env.act(hfo.MOVE)
+    return
+  is_tackleable_opp = is_tackleable(agent_pos_x, agent_pos_y,
+                                    ball_sorted_list[0][2], ball_sorted_list[0][3])
+  if state_vec[5] > 0: # kickable distance of player
+    if ball_sorted_list[0][1] < params['LOW_KICK_DIST']:
+      hfo_env.act(hfo.MOVE) # will do tackle
+    elif ball_toward_goal:
+      hfo_env.act(hfo.INTERCEPT)
+    elif is_tackleable_opp:
+      if ball_sorted_list[0][1] < get_dist_normalized(agent_pos_x, agent_pos_y,
+                                                      ball_pos_x, ball_pos_y):
+        hfo_env.act(hfo.MOVE) # will do tackle
+      else:
+        hfo_env.act(hfo.INTERCEPT)
+    else:
+      hfo_env.act(hfo.GO_TO_BALL)
+    return
+  if ball_sorted_list[0][1] < params['KICK_DIST']:
+    goal_sorted_list = get_sorted_opponents(state_vec, num_opponents, num_teammates,
+                                            pos_x=GOAL_POS_X, pos_y=GOAL_POS_Y)
+    if goal_sorted_list[0][0] != ball_sorted_list[0][0]: # top in each are opponents to worry about
+      if is_in_open_area(ball_sorted_list[0][2],ball_sorted_list[0][3]):
+        hfo_env.act(hfo.MARK_PLAYER, goal_sorted_list[0][0])
+      elif get_dist_normalized(agent_pos_x, agent_pos_y,
+                               ball_pos_x, ball_pos_y) < ball_sorted_list[0][1]:
+##        # odd; why not kickable above?
+##        print("Ball dist below {0:n}".format(ball_sorted_list[0][1]) +
+##              " but not kickable (btg {0!r} ito {1!r})".format(ball_toward_goal,
+##                                                                is_tackleable_opp))
+        if ball_toward_goal:
+          hfo_env.act(hfo.INTERCEPT)
+        elif is_in_open_area(ball_pos_x, ball_pos_y) or (is_tackleable_opp and
+                                                         (ball_sorted_list[0][1] <
+                                                          params['LOW_KICK_DIST'])):
+          hfo_env.act(hfo.MOVE) # will do tackle or appropriate
+        else:
+          hfo_env.act(hfo.GO_TO_BALL)
+      elif is_tackleable_opp and (ball_sorted_list[0][1] < params['LOW_KICK_DIST']):
+        hfo_env.act(hfo.MOVE) # will do tackle
+      else:
+        hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
+    elif is_tackleable_opp and (ball_sorted_list[0][1] < params['LOW_KICK_DIST']):
+      hfo_env.act(hfo.MOVE) # will do tackle
+    else:
+      hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
+    return
+  if (not is_in_open_area(ball_pos_x, ball_pos_y)) and ball_toward_goal:
+    hfo_env.act(hfo.INTERCEPT)
+    return
+  if get_dist_normalized(agent_pos_x, agent_pos_y, ball_pos_x, ball_pos_y) < ball_sorted_list[0][1]:
+    if ball_toward_goal:
+      hfo_env.act(hfo.INTERCEPT)
+    elif is_in_open_area(ball_pos_x, ball_pos_y):
+      hfo_env.act(hfo.MOVE)
+    else:
+      hfo_env.act(hfo.GO_TO_BALL)
+    return
+  goal_sorted_list = get_sorted_opponents(state_vec, num_opponents, num_teammates,
+                                          pos_x=GOAL_POS_X, pos_y=GOAL_POS_Y)
+  if is_in_open_area(goal_sorted_list[0][2], goal_sorted_list[0][3]):
+    hfo_env.act(hfo.MOVE)
+  else:
+    hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
+  return
+def do_random_defense_action(state, hfo_env):
+  if state[5] > 0: # kickable
+    if random.random() < 0.5:
+      hfo_env.act(hfo.INTERCEPT)
+    else:
+      hfo_env.act(hfo.MOVE)
+  else:
+    hfo_env.act(random.choose(hfo.MOVE,hfo.MOVE,
+                              hfo.REDUCE_ANGLE_TO_GOAL,hfo.REDUCE_ANGLE_TO_GOAL,
+                              hfo.GO_TO_BALL,hfo.INTERCEPT))
+  return
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--port', type=int, default=6000, help="Server port")
+  parser.add_argument('--seed', type=int, default=None,
+                      help="Python randomization seed; uses python default if 0 or not given")
+  parser.add_argument('--epsilon', type=float, default=0,
+                      help="Probability of a random action, to adjust difficulty")
+  parser.add_argument('--record', action='store_true',
+                      help="If doing HFO --record")
+  parser.add_argument('--rdir', type=str, default='log/',
+                      help="Set directory to use if doing --record")
+  args=parser.parse_args()
+  if args.seed:
+    random.seed(args.seed)
+  hfo_env = hfo.HFOEnvironment()
+  if args.record:
+    hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET,
+                            'bin/teams/base/config/formations-dt', args.port,
+                            'localhost', 'base_right', play_goalie=False,
+                            record_dir=args.rdir)
+  else:
+    hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET,
+                            'bin/teams/base/config/formations-dt', args.port,
+                            'localhost', 'base_right', play_goalie=False)
+  numTeammates = hfo_env.getNumTeammates()
+  numOpponents = hfo_env.getNumOpponents()
+  if args.seed:
+    if args.epsilon > 0:
+      print("Python randomization seed: {0:d}".format(args.seed))
+    else:
+      print("Python randomization seed useless without --epsilon >0")
+  if args.epsilon > 0:
+    print("Using epsilon {0:n}".format(args.epsilon))
+  for episode in itertools.count():
+    old_ball_pos_x = 0
+    old_ball_pos_y = 0
+    status = hfo.IN_GAME
+    while status == hfo.IN_GAME:
+      state = hfo_env.getState()
+      if (args.epsilon > 0) and (random.random() < args.epsilon):
+        do_random_defense_action(state, hfo_env)
+      else:
+        do_defense_action(state_vec=state, hfo_env=hfo_env, episode=episode,
+                          num_opponents=numOpponents, num_teammates=numTeammates,
+                          old_ball_pos_x=old_ball_pos_x, old_ball_pos_y=old_ball_pos_y)
+      old_ball_pos_x=state[3]
+      old_ball_pos_y=state[4]
+      status=hfo_env.step()
+      #print(status)
+    # Quit if the server goes down
+    if status == hfo.SERVER_DOWN:
+      hfo_env.act(hfo.QUIT)
+      exit()
+    # Check the outcome of the episode
+    print("Episode {0:d} ended with {1:s}".format(episode,
+                                                  hfo_env.statusToString(status)))
+if __name__ == '__main__':
+  main()
--- a/src/agent.cpp
+++ b/src/agent.cpp
@@ -923,10 +923,12 @@ bool Agent::doMarkPlayer(int unum) {
  int count = 0;
  for ( PlayerPtrCont::const_iterator it = wm.opponentsFromSelf().begin(); it != o_end; ++it ) {
      if ( (*it)->distFromBall() < 5 ) {
+	if ((kicker_unum == -1) || (kicker_unum != unum)) { // try to obey action instruction
          kicker_pos = (*it)->pos();
          kicker_unum = (*it)->unum();
 	}
      }
+  }
  for ( PlayerPtrCont::const_iterator it = wm.opponentsFromSelf().begin(); it !=  o_end; ++it ) {
 	  if ( (*it)-> unum() == unum ) {