Merge branch 'master' into action_chart - avoid rebase, bring in fixes

487d4ff0 · drallensmith · 7f4a62f1 · 981226d2 · 487d4ff0 · 487d4ff0
Commit 487d4ff0 authored Jul 20, 2017 by drallensmith
25 changed files
--- a/.travis.yml
+++ b/.travis.yml
 sudo: required
 language: cpp
 dist: trusty
+branches:
+  except:
+    - action_chart
 addons:
  apt:
    sources:

--- a/bin/HFO
+++ b/bin/HFO
@@ -53,7 +53,11 @@ def launch(cmd, name = 'Unknown', necessary = True, suppressOutput = True):
 def main(args):
  """Sets up the teams, launches the server and monitor, starts the trainer.
  """
-  if args.logging and not os.path.exists(args.logDir):
+  if args.logging:
+    args.hfo_logging = True
+    if not os.path.exists(args.logDir):
+      os.makedirs(args.logDir)
+  elif args.hfo_logging and not os.path.exists(args.logDir):
    os.makedirs(args.logDir)
  num_agents   = args.offenseAgents + args.defenseAgents
  binary_dir   = os.path.dirname(os.path.realpath(__file__))
@@ -77,7 +81,7 @@ def main(args):
                  'server::say_msg_size=%i ' \
                  'server::record_messages=%i' \
                  %(server_port, coach_port, olcoach_port,
-                    args.logging, args.logging, args.logging,
+                    args.logging, args.logging, args.hfo_logging,
                    args.logDir, args.logDir, args.logDir,
                    args.sync, args.fullstate, args.fullstate,
                    args.maxFramesPerTrial, args.numTrials, args.numFrames,
@@ -166,6 +170,8 @@ def parseArgs():
                 'incrementally allocated the following ports.')
  p.add_argument('--no-logging', dest='logging', action='store_false',
                 default=True, help='Disable rcssserver logging.')
+  p.add_argument('--hfo-logging', dest='hfo_logging', action='store_true', default=False,
+                 help="Do .hfo logging even if no other logging")
  p.add_argument('--log-dir', dest='logDir', default='log/',
                 help='Directory to store logs. Default: log/')
  p.add_argument('--record', dest='record', action='store_true',

--- a/example/base_2v2.sh
+++ b/example/base_2v2.sh
+#!/bin/bash
+./bin/HFO --offense-npcs=2 --defense-npcs=2 --trials 20 --headless &
+# The magic line
+#   $$ holds the PID for this script
+#   Negation means kill by process group id instead of PID
+trap "kill -TERM -$$" SIGINT
+wait
--- a/example/base_long_2v2.sh
+++ b/example/base_long_2v2.sh
+#!/bin/bash
+# Be sure to change/remove the seed for different experiments!
+./bin/HFO --offense-npcs=2 --defense-npcs=2 --trials 5000 --headless  --seed=1500348586 --no-logging --hfo-logging &
+# The magic line
+#   $$ holds the PID for this script
+#   Negation means kill by process group id instead of PID
+trap "kill -TERM -$$" SIGINT
+wait
--- a/example/defense_2v3.sh
+++ b/example/defense_2v3.sh
+#!/bin/bash
+# HAS TO BE RUN FROM EXAMPLE DIR DUE TO hand_coded_defense_agent CONFIG!
+../bin/HFO --offense-npcs=2 --defense-agents=1 --defense-npcs=2 --trials 20 --headless --port=7000 &
+# The below sleep period is needed to avoid the agent connecting in before the
+# Trainer.py script gets the base/Helios goalie connected in; if that happens,
+# the agent gets assigned unum 1 and there is a mixup in which agent is
+# supposed to be the goalie (some portions of the various programs go by unum,
+# others go by a goalie flag).
+sleep 15
+./hand_coded_defense_agent &> agent1.txt &
+sleep 5
+# The magic line
+#   $$ holds the PID for this script
+#   Negation means kill by process group id instead of PID
+trap "kill -TERM -$$" SIGINT
+wait
\ No newline at end of file
--- a/example/defense_long_2v2.sh
+++ b/example/defense_long_2v2.sh
+#!/bin/bash
+# HAS TO BE RUN FROM EXAMPLE DIR DUE TO hand_coded_defense_agent CONFIG!
+# Change to a new seed for different experiments!
+../bin/HFO --offense-npcs=2 --defense-agents=1 --defense-npcs=1 --trials 5000 --headless --port=7000 --seed 1500348586 --no-logging --hfo-logging &
+# Sleep this long needed so doesn't connect as unum 1 (goalie)
+sleep 15
+./hand_coded_defense_agent &> agent1.txt &
+sleep 5
+# The magic line
+#   $$ holds the PID for this script
+#   Negation means kill by process group id instead of PID
+trap "kill -TERM -$$" SIGINT
+wait
\ No newline at end of file
--- a/example/defense_python_2v2.sh
+++ b/example/defense_python_2v2.sh
+#!/bin/bash
+./bin/HFO --offense-npcs=2 --defense-agents=1 --defense-npcs=1 --trials 20 --headless &
+# Sleep is needed to make sure doesn't get connected too soon, as unum 1 (goalie)
+sleep 15
+./example/hand_coded_defense_agent.py &> agent1.txt &
+sleep 5
+# The magic line
+#   $$ holds the PID for this script
+#   Negation means kill by process group id instead of PID
+trap "kill -TERM -$$" SIGINT
+wait
\ No newline at end of file
--- a/example/defense_python_2v3.sh
+++ b/example/defense_python_2v3.sh
+#!/bin/bash
+./bin/HFO --offense-npcs=2 --defense-agents=1 --defense-npcs=2 --trials 20 --headless &
+# Sleep is needed to make sure doesn't get connected too soon, as unum 1 (goalie)
+sleep 15
+./example/hand_coded_defense_agent.py &> agent1.txt &
+sleep 5
+# The magic line
+#   $$ holds the PID for this script
+#   Negation means kill by process group id instead of PID
+trap "kill -TERM -$$" SIGINT
+wait
\ No newline at end of file
--- a/example/defense_python_3v3.sh
+++ b/example/defense_python_3v3.sh
+#!/bin/bash
+./bin/HFO --offense-npcs=3 --defense-agents=1 --defense-npcs=2 --trials 20 --headless &
+# Sleep is needed to make sure doesn't get connected too soon, as unum 1 (goalie)
+sleep 15
+./example/hand_coded_defense_agent.py &> agent1.txt &
+sleep 5
+# The magic line
+#   $$ holds the PID for this script
+#   Negation means kill by process group id instead of PID
+trap "kill -TERM -$$" SIGINT
+wait
\ No newline at end of file
--- a/example/defense_python_long_2v2.sh
+++ b/example/defense_python_long_2v2.sh
+#!/bin/bash
+# Change to a different seed for different experiments!
+./bin/HFO --offense-npcs=2 --defense-agents=1 --defense-npcs=1 --trials 5000 --headless --seed 1500348586 --no-logging --hfo-logging &
+# Sleep is needed to make sure doesn't get connected too soon, as unum 1 (goalie)
+sleep 15
+./example/hand_coded_defense_agent.py &> agent1.txt &
+sleep 5
+# The magic line
+#   $$ holds the PID for this script
+#   Negation means kill by process group id instead of PID
+trap "kill -TERM -$$" SIGINT
+wait
\ No newline at end of file
--- a/example/hand_coded_defense_agent.cpp
+++ b/example/hand_coded_defense_agent.cpp
@@ -66,7 +66,7 @@ bool is_in_open_area(double pos_x, double pos_y) {
        }
 }
-action_with_params get_defense_action(const std::vector<float>& state_vec, double no_of_opponents, double numTMates) {
+action_with_params get_defense_action(const std::vector<float>& state_vec, double no_of_opponents, int numTMates) {
        int size_of_vec = 10 + 6*numTMates + 3*no_of_opponents;
        if (size_of_vec != state_vec.size()) {
                std :: cout <<"Invalid Feature Vector / Check the number of teammates/opponents provided";
@@ -221,7 +221,7 @@ int main(int argc, char** argv) {
    // Get the vector of state features for the current state
      const vector<float>& feature_vec = hfo.getState();
      if (random == 0) {
-              action_with_params a = get_defense_action(feature_vec, 2,1);
+              action_with_params a = get_defense_action(feature_vec, 2, hfo.getNumTeammates());
         // std::cout << a.action << a.param;
         if (a.action == hfo :: MARK_PLAYER || a.action == hfo::TACKLE) {
                  hfo.act(a.action, a.param);

--- a/example/hand_coded_defense_agent.py
+++ b/example/hand_coded_defense_agent.py
+#!/usr/bin/env python
+"""
+This is a hand-coded defense agent, using hand_coded_defense_agent.cpp as a starting point,
+that should be able to play, for instance, a 2v2 game againt 2 offense npcs. It requires a goal
+keeper/goalie.
+"""
+from __future__ import print_function
+# encoding: utf-8
+# First Start the server: $> bin/start.py
+import argparse
+import itertools
+import math
+import random
+try:
+  import hfo
+except ImportError:
+  print('Failed to import hfo. To install hfo, in the HFO directory'\
+    ' run: \"pip install .\"')
+  exit()
+GOAL_POS_X = 1.0
+GOAL_POS_Y = 0.0
+# below - from hand_coded_defense_agent.cpp except LOW_KICK_DIST
+HALF_FIELD_WIDTH = 68 # y coordinate -34 to 34 (-34 = bottom 34 = top)
+HALF_FIELD_LENGTH = 52.5 # x coordinate 0 to 52.5 (0 = goalline 52.5 = center)
+params = {'KICK_DIST':(1.504052352*1), 'OPEN_AREA_HIGH_LIMIT_X':0.747311440447,
+          'TACKLE_DIST':(1.613456553*1), 'LOW_KICK_DIST':((5*5)/HALF_FIELD_LENGTH)}
+def get_dist_normalized(ref_x, ref_y, src_x, src_y):
+  return math.sqrt(math.pow((ref_x - src_x),2) +
+                   math.pow(((HALF_FIELD_WIDTH/HALF_FIELD_LENGTH)*(ref_y - src_y)),2))
+##def is_kickable(ball_pos_x, ball_pos_y, kicker_pos_x, kicker_pos_y):
+##  return get_dist_normalized(ball_pos_x, ball_pos_y,
+##                             kicker_pos_x, kicker_pos_y) < params['KICK_DIST']
+def is_tackleable(agent_pos_x, agent_pos_y, ball_dist, opp_pos_x, opp_pos_y):
+  return (get_dist_normalized(agent_pos_x,
+                              agent_pos_y,
+                              opp_pos_x,
+                              opp_pos_y) < params['TACKLE_DIST']) and (ball_dist <
+                                                                       params['LOW_KICK_DIST'])
+def ball_moving_toward_goal(ball_pos_x, ball_pos_y, old_ball_pos_x, old_ball_pos_y):
+  return (get_dist_normalized(ball_pos_x, ball_pos_y,
+                              GOAL_POS_X, GOAL_POS_Y) < min(params['KICK_DIST'],
+                                                            get_dist_normalized(old_ball_pos_x,
+                                                                                old_ball_pos_y,
+                                                                                GOAL_POS_X,
+                                                                                GOAL_POS_Y)))
+def ball_nearer_to_goal(ball_pos_x, ball_pos_y, agent_pos_x, agent_pos_y):
+  return get_dist_normalized(ball_pos_x, ball_pos_y,
+                             GOAL_POS_X, GOAL_POS_Y) < min(params['KICK_DIST'],
+                                                           get_dist_normalized(agent_pos_x,
+                                                                               agent_pos_y,
+                                                                               GOAL_POS_X,
+                                                                               GOAL_POS_Y))
+def get_sorted_opponents(state_vec, num_opponents, num_teammates, pos_x, pos_y):
+  """
+  Returns a list of tuple(unum, dist, opp_pos_x, opp_pos_y),
+  sorted in increasing order of dist from the given position
+  """
+  unum_list = []
+  for i in range(num_opponents):
+    unum = state_vec[9+(i*3)+(6*num_teammates)+3]
+    if unum > 0:
+      opp_pos_x = state_vec[9+(i*3)+(6*num_teammates)+1]
+      opp_pos_y = state_vec[9+(i*3)+(6*num_teammates)+2]
+      dist = get_dist_normalized(pos_x, pos_y, opp_pos_x, opp_pos_y)
+      unum_list.append(tuple([unum, dist, opp_pos_x, opp_pos_y]))
+    # otherwise, unknown
+  if len(unum_list) > 1:
+    return sorted(unum_list, key=lambda x: x[1])
+  return unum_list
+def is_in_open_area(pos_x, ignored_pos_y):
+  return pos_x >= params['OPEN_AREA_HIGH_LIMIT_X']
+def add_num_times(action, main_dict, opt_dict=None):
+  main_dict[action] += 1
+  if opt_dict:
+    opt_dict[action] += 1
+  return action
+def do_defense_action(state_vec, hfo_env,
+                      num_opponents, num_teammates,
+                      old_ball_pos_x, old_ball_pos_y,
+                      num_times_overall, num_times_kickable,
+                      misc_tracked):
+  """Figures out and does the (hopefully) best defense action."""
+  min_vec_size = 10 + (6*num_teammates) + (3*num_opponents)
+  if (len(state_vec) < min_vec_size):
+    raise LookupError("Feature vector length is {0:d} not {1:d}".format(len(state_vec),
+                                                                        min_vec_size))
+  agent_pos_x = state_vec[0]
+  agent_pos_y = state_vec[1]
+  ball_pos_x = state_vec[3]
+  ball_pos_y = state_vec[4]
+  # if get high_level working for invalid
+  if (min(agent_pos_x,agent_pos_y,ball_pos_x,ball_pos_y) < -1):
+    hfo_env.act(hfo.MOVE) # will be Reorient in that version
+    return
+  ball_toward_goal = ball_moving_toward_goal(ball_pos_x, ball_pos_y,
+                                             old_ball_pos_x, old_ball_pos_y)
+  ball_nearer_goal = ball_nearer_to_goal(ball_pos_x, ball_pos_y,
+                                         agent_pos_x, agent_pos_y)
+  ball_sorted_list = get_sorted_opponents(state_vec, num_opponents, num_teammates,
+                                          pos_x=ball_pos_x, pos_y=ball_pos_y)
+  if not ball_sorted_list: # unknown opponent positions/unums
+    print("No known opponent locations (btg {0!r}; bng {1!r}; ".format(ball_toward_goal,
+                                                                       ball_nearer_goal) +
+          "ball xy {0:n}, {1:n}; ball old xy {2:n}, {3:n})".format(ball_pos_x,
+                                                                   ball_pos_y,
+                                                                   old_ball_pos_x,
+                                                                   old_ball_pos_y))
+    if ball_toward_goal:
+      if ball_nearer_goal:
+        hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
+      else:
+        hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall))
+    else:
+      hfo_env.act(add_num_times(hfo.MOVE,num_times_overall))
+    return
+  goal_sorted_list = get_sorted_opponents(state_vec, num_opponents, num_teammates,
+                                          pos_x=GOAL_POS_X, pos_y=GOAL_POS_Y)
+  if ball_toward_goal:
+    if ball_sorted_list[0][1] < params['LOW_KICK_DIST']:
+      ball_toward_goal = False
+    elif goal_sorted_list[0][1] < get_dist_normalized(ball_pos_x,ball_pos_y,
+                                                      GOAL_POS_X,GOAL_POS_Y):
+      ball_toward_goal = False
+  is_tackleable_opp = is_tackleable(agent_pos_x, agent_pos_y,
+                                    ball_sorted_list[0][1],
+                                    ball_sorted_list[0][2], ball_sorted_list[0][3])
+  agent_to_ball_dist = get_dist_normalized(agent_pos_x, agent_pos_y,
+                                           ball_pos_x, ball_pos_y)
+  if state_vec[5] > 0: # kickable distance of player
+    misc_tracked['max_kickable_dist'] = max(agent_to_ball_dist,misc_tracked['max_kickable_dist'])
+    if is_tackleable_opp:
+      hfo_env.act(add_num_times(hfo.MOVE,num_times_overall,num_times_kickable)) # will do tackle
+    elif ball_nearer_goal:
+      hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall,num_times_kickable))
+    elif ball_toward_goal:
+      hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall,num_times_kickable))
+    else:
+      hfo_env.act(add_num_times(hfo.GO_TO_BALL,num_times_overall,num_times_kickable))
+    return
+  if goal_sorted_list[0][0] != ball_sorted_list[0][0]:
+    if is_in_open_area(ball_sorted_list[0][2],
+                       ball_sorted_list[0][3]) and is_in_open_area(goal_sorted_list[0][2],
+                                                                   goal_sorted_list[0][3]):
+      if ball_sorted_list[0][1] < params['LOW_KICK_DIST']:
+        hfo_env.act(add_num_times(hfo.MARK_PLAYER,num_times_overall),
+                    goal_sorted_list[0][0])
+      elif agent_to_ball_dist < ball_sorted_list[0][1]:
+        if ball_nearer_goal:
+          hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
+        elif ball_toward_goal:
+          hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall))
+        else:
+          hfo_env.act(add_num_times(hfo.GO_TO_BALL,num_times_overall))
+      else:
+        hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
+    elif ball_sorted_list[0][1] >= params['KICK_DIST']:
+      if agent_to_ball_dist < ball_sorted_list[0][1]:
+        if ball_nearer_goal:
+          hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
+        elif ball_toward_goal:
+          hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall))
+        else:
+          hfo_env.act(add_num_times(hfo.GO_TO_BALL,num_times_overall))
+      else:
+        hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
+    elif is_tackleable_opp and (not is_in_open_area(ball_sorted_list[0][2],
+                                                    ball_sorted_list[0][3])):
+      hfo_env.act(add_num_times(hfo.MOVE,num_times_overall))
+    elif ball_sorted_list[0][1] < (1*params['LOW_KICK_DIST']):
+      hfo_env.act(add_num_times(hfo.MARK_PLAYER,num_times_overall),
+                  goal_sorted_list[0][0])
+    else:
+      hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
+    return
+  if is_in_open_area(ball_sorted_list[0][2],ball_sorted_list[0][3]):
+    if ball_sorted_list[0][1] < params['KICK_DIST']:
+      hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
+    elif agent_to_ball_dist < params['KICK_DIST']:
+      if ball_nearer_goal:
+        hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
+      elif ball_toward_goal:
+        hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall))
+      else:
+        hfo_env.act(add_num_times(hfo.GO_TO_BALL,num_times_overall))
+    elif is_tackleable_opp:
+      hfo_env.act(add_num_times(hfo.MOVE,num_times_overall))
+    else:
+      hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
+  else:
+    if ball_sorted_list[0][1] >= max(params['KICK_DIST'],agent_to_ball_dist):
+      if ball_nearer_goal:
+        hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
+      elif ball_toward_goal:
+        hfo_env.act(add_num_times(hfo.INTERCEPT,num_times_overall))
+      else:
+        hfo_env.act(add_num_times(hfo.GO_TO_BALL,num_times_overall))
+    elif ball_sorted_list[0][1] >= params['KICK_DIST']:
+      hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
+    elif is_tackleable_opp:
+      hfo_env.act(add_num_times(hfo.MOVE,num_times_overall))
+    else:
+      hfo_env.act(add_num_times(hfo.REDUCE_ANGLE_TO_GOAL,num_times_overall))
+  return
+def do_random_defense_action(state, hfo_env):
+  if state[5] > 0: # kickable
+    hfo_env.act(hfo.MOVE)
+  else:
+    if random.random() < 0.25:
+      hfo_env.act(hfo.REDUCE_ANGLE_TO_GOAL)
+    else:
+      hfo_env.act(hfo.MOVE)
+  return
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--port', type=int, default=6000, help="Server port")
+  parser.add_argument('--seed', type=int, default=None,
+                      help="Python randomization seed; uses python default if 0 or not given")
+  parser.add_argument('--epsilon', type=float, default=0,
+                      help="Probability of a random action, to adjust difficulty")
+  parser.add_argument('--record', action='store_true',
+                      help="If doing HFO --record")
+  parser.add_argument('--rdir', type=str, default='log/',
+                      help="Set directory to use if doing --record")
+  args=parser.parse_args()
+  if args.seed:
+    random.seed(args.seed)
+  hfo_env = hfo.HFOEnvironment()
+  if args.record:
+    hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET,
+                            'bin/teams/base/config/formations-dt', args.port,
+                            'localhost', 'base_right', play_goalie=False,
+                            record_dir=args.rdir)
+  else:
+    hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET,
+                            'bin/teams/base/config/formations-dt', args.port,
+                            'localhost', 'base_right', play_goalie=False)
+  numTeammates = hfo_env.getNumTeammates()
+  numOpponents = hfo_env.getNumOpponents()
+  if args.seed:
+    if args.epsilon > 0:
+      print("Python randomization seed: {0:d}".format(args.seed))
+    else:
+      print("Python randomization seed useless without --epsilon >0")
+  if args.epsilon > 0:
+    print("Using epsilon {0:n}".format(args.epsilon))
+  my_unum = hfo_env.getUnum()
+  assert ((my_unum > 1) and (my_unum <= 11)), "Bad unum {!r}".format(my_unum)
+  print("My unum is {0:d}".format(my_unum))
+  num_times_overall = {}
+  num_times_kickable = {}
+  for action in range(hfo.NUM_HFO_ACTIONS):
+    num_times_overall[action] = 0
+    num_times_kickable[action] = 0
+  misc_tracked = {'max_kickable_dist':0}
+  for episode in itertools.count():
+    old_ball_pos_x = -1
+    old_ball_pos_y = 0
+    episode_start = True
+    status = hfo.IN_GAME
+    while status == hfo.IN_GAME:
+      state = hfo_env.getState()
+      if episode_start:
+        if (state[3] >= -1) and (state[3] <= 1):
+          old_ball_pos_x = state[3]
+        if (state[4] >= -1) and (state[4] <= 1):
+          old_ball_pos_y = state[4]
+        episode_start = False
+      if (args.epsilon > 0) and (random.random() < args.epsilon):
+        do_random_defense_action(state, hfo_env)
+      else:
+        do_defense_action(state_vec=state, hfo_env=hfo_env,
+                          num_opponents=numOpponents, num_teammates=numTeammates,
+                          old_ball_pos_x=old_ball_pos_x, old_ball_pos_y=old_ball_pos_y,
+                          num_times_overall=num_times_overall,
+                          num_times_kickable=num_times_kickable,
+                          misc_tracked=misc_tracked)
+      old_ball_pos_x=state[3]
+      old_ball_pos_y=state[4]
+      status=hfo_env.step()
+      #print(status)
+    # Quit if the server goes down
+    if status == hfo.SERVER_DOWN:
+      for action in range(hfo.NUM_HFO_ACTIONS):
+        if num_times_overall[action]:
+          print("Overall times {0!s}: {1:d}".format(hfo_env.actionToString(action),
+                                                    num_times_overall[action]))
+      for action in range(hfo.NUM_HFO_ACTIONS):
+        if num_times_kickable[action]:
+          print("Kickable times {0!s}: {1:d}".format(hfo_env.actionToString(action),
+                                                     num_times_kickable[action]))
+      print("Max kickable dist: {0:n}".format(misc_tracked['max_kickable_dist']))
+      hfo_env.act(hfo.QUIT)
+      exit()
+    # Check the outcome of the episode
+    print("Episode {0:d} ended with {1:s}".format(episode,
+                                                  hfo_env.statusToString(status)))
+if __name__ == '__main__':
+  main()
--- a/example/high_level_custom_agent.py
+++ b/example/high_level_custom_agent.py
 #!/usr/bin/env python
+from __future__ import print_function
 # encoding: utf-8
 #MODIFIED#
 # First Start the server: $> bin/start.py
-import random, threading, argparse
+import argparse
 import itertools
+import random
 try:
-  from hfo import *
+  import hfo
-except:
+except ImportError:
  print('Failed to import hfo. To install hfo, in the HFO directory'\
    ' run: \"pip install .\"')
  exit()
@@ -16,81 +18,117 @@ params = {'SHT_DST':0.136664020547, 'SHT_ANG':-0.747394386098,
          'PASS_ANG':0.464086704478, 'DRIB_DST':-0.999052871962}
 def can_shoot(goal_dist, goal_angle):
-  """Returns True if if player can have a good shot at goal"""
+  """Returns True if if player may have a good shot at the goal"""
-  if goal_dist < params['SHT_DST'] and goal_angle > params['SHT_ANG']:
+  return bool((goal_dist < params['SHT_DST']) and (goal_angle > params['SHT_ANG']))
-    return True
-  else:
-    return False
 def has_better_pos(dist_to_op, goal_angle, pass_angle, curr_goal_angle):
  """Returns True if teammate is in a better attacking position"""
-  if curr_goal_angle > goal_angle or dist_to_op<params['DRIB_DST']:
+  if (curr_goal_angle > goal_angle) or (dist_to_op < params['DRIB_DST']):
    return False
  if pass_angle < params['PASS_ANG']:
    return False
  return True
-def can_dribble(dist_to_op):
+def get_action(state,hfo_env,num_teammates,rand_pass):
-  if dist_to_op > params['DRIB_DST']:
+  """Decides and performs the action to be taken by the agent."""
-    return True
-  else:
-    return False
-def get_action(state,hfo_env,num_teammates):
-  """Returns the action to be taken by the agent"""
  goal_dist = float(state[6])
  goal_op_angle = float(state[8])
  if can_shoot(goal_dist, goal_op_angle):
-    hfo_env.act(SHOOT)
+    hfo_env.act(hfo.SHOOT)
    return
-  for i in range(num_teammates):
+  team_list = list(range(num_teammates))
+  if rand_pass and (num_teammates > 1):
+    random.shuffle(team_list)
+  for i in team_list:
    teammate_uniform_number=state[10 + 3*num_teammates + 3*i +2]
    if has_better_pos(dist_to_op = float(state[10 + num_teammates + i]),
                      goal_angle = float(state[10 + i]),
                      pass_angle = float(state[10 + 2*num_teammates + i]),
                      curr_goal_angle = goal_op_angle):
-      hfo_env.act(PASS, teammate_uniform_number)
+      hfo_env.act(hfo.PASS, teammate_uniform_number)
      return
-  # not sure if below check is needed - doDribble in agent.cpp includes
+  # no check for can_dribble is needed; doDribble in agent.cpp includes
-  # (via doPreprocess) doForceKick, which may cover this situation depending
+  # (via doPreprocess) doForceKick, which will cover this situation since
-  # on what existKickableOpponent returns.
+  # existKickableOpponent is based on distance.
-  if can_dribble(dist_to_op = state[9]):
+  hfo_env.act(hfo.DRIBBLE)
-    hfo_env.act(DRIBBLE)
  return
-  # If nothing can be done, do not do anything
-  hfo_env.act(NOOP)
 def main():
  parser = argparse.ArgumentParser()
-  parser.add_argument('--port', type=int, default=6000)
+  parser.add_argument('--port', type=int, default=6000, help="Server port")
-  parser.add_argument('--numTeammates', type=int, default=0)
+  parser.add_argument('--seed', type=int, default=None,
-  parser.add_argument('--numOpponents', type=int, default=1)
+                      help="Python randomization seed; uses python default if 0 or not given")
+  parser.add_argument('--rand-pass', action="store_true",
+                      help="Randomize order of checking teammates for a possible pass")
+  parser.add_argument('--epsilon', type=float, default=0,
+                      help="Probability of a random action if has the ball, to adjust difficulty")
+  parser.add_argument('--record', action='store_true',
+                      help="If doing HFO --record")
+  parser.add_argument('--rdir', type=str, default='log/',
+                      help="Set directory to use if doing --record")
  args=parser.parse_args()
-  hfo_env = HFOEnvironment()
+  if args.seed:
-  hfo_env.connectToServer(HIGH_LEVEL_FEATURE_SET,
+    random.seed(args.seed)
+  hfo_env = hfo.HFOEnvironment()
+  if args.record:
+    hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET,
+                            'bin/teams/base/config/formations-dt', args.port,
+                            'localhost', 'base_left', False,
+                            record_dir=args.rdir)
+  else:
+    hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET,
                            'bin/teams/base/config/formations-dt', args.port,
                            'localhost', 'base_left', False)
-  #itertools.count() counts forever
+  num_teammates = hfo_env.getNumTeammates()
+  #num_opponents = hfo_env.getNumOpponents()
+  if args.seed:
+    if (args.rand_pass and (num_teammates > 1)) or (args.epsilon > 0):
+      print("Python randomization seed: {0:d}".format(args.seed))
+    else:
+      print("Python randomization seed useless without --rand-pass w/2+ teammates or --epsilon >0")
+  if args.rand_pass and (num_teammates > 1):
+    print("Randomizing order of checking for a pass")
+  if args.epsilon > 0:
+    print("Using epsilon {0:n}".format(args.epsilon))
  for episode in itertools.count():
-    status=IN_GAME
+    num_eps = 0
-    count=0
+    num_had_ball = 0
-    while status==IN_GAME:
+    num_move = 0
+    status = hfo.IN_GAME
+    while status == hfo.IN_GAME:
      state = hfo_env.getState()
      #print(state)
      if int(state[5]) == 1: # state[5] is 1 when player has the ball
-        tmp = get_action(state,hfo_env,args.numTeammates)  
+        if (args.epsilon > 0) and (random.random() < args.epsilon):
-        #print(tmp)
+          if random.random() < 0.5:
-        #hfo_env.act(tmp)
+            hfo_env.act(hfo.SHOOT)
          else:
-        hfo_env.act(MOVE)
+            hfo_env.act(hfo.DRIBBLE)
+          num_eps += 1
+        else:
+          get_action(state,hfo_env,num_teammates,args.rand_pass)
+        num_had_ball += 1
+      else:
+        hfo_env.act(hfo.MOVE)
+        num_move += 1
      status=hfo_env.step()
      #print(status)
-      if status == SERVER_DOWN:
-        hfo_env.act(QUIT)
+    # Quit if the server goes down
+    if status == hfo.SERVER_DOWN:
+      hfo_env.act(hfo.QUIT)
      exit()
+    # Check the outcome of the episode
+    print("Episode {0:d} ended with {1:s}".format(episode,
+                                                  hfo_env.statusToString(status)))
+    if args.epsilon > 0:
+      print("\tNum move: {0:d}; Random action: {1:d}; Nonrandom: {2:d}".format(num_move,
+                                                                               num_eps,
+                                                                               (num_had_ball-
+                                                                                num_eps)))
 if __name__ == '__main__':
  main()
--- a/example/high_level_random_agent.py
+++ b/example/high_level_random_agent.py
@@ -4,34 +4,64 @@
 # Before running this program, first Start HFO server:
 # $> ./bin/HFO --offense-agents 1
-import random, itertools
+import argparse
-from hfo import *
+import itertools
+import random
+try:
+  import hfo
+except ImportError:
+  print('Failed to import hfo. To install hfo, in the HFO directory'\
+    ' run: \"pip install .\"')
+  exit()
 def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--port', type=int, default=6000,
+                      help="Server port")
+  parser.add_argument('--seed', type=int, default=None,
+                      help="Python randomization seed; uses python default if 0 or not given")
+  parser.add_argument('--record', action='store_true',
+                      help="Doing HFO --record")
+  parser.add_argument('--rdir', type=str, default='log/',
+                      help="Set directory to use if doing HFO --record")
+  args=parser.parse_args()
+  if args.seed:
+    random.seed(args.seed)
  # Create the HFO Environment
-  hfo = HFOEnvironment()
+  hfo_env = hfo.HFOEnvironment()
  # Connect to the server with the specified
  # feature set. See feature sets in hfo.py/hfo.hpp.
-  hfo.connectToServer(HIGH_LEVEL_FEATURE_SET,
+  if args.record:
-                      'bin/teams/base/config/formations-dt', 6000,
+    hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET,
+                            'bin/teams/base/config/formations-dt', args.port,
+                            'localhost', 'base_left', False,
+                            record_dir=args.rdir)
+  else:
+    hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET,
+                            'bin/teams/base/config/formations-dt', args.port,
                          'localhost', 'base_left', False)
  for episode in itertools.count():
-    status = IN_GAME
+    status = hfo.IN_GAME
-    while status == IN_GAME:
+    while status == hfo.IN_GAME:
      # Get the vector of state features for the current state
-      state = hfo.getState()
+      state = hfo_env.getState()
      # Perform the action
      if state[5] == 1: # State[5] is 1 when the player can kick the ball
-        hfo.act(random.choice([SHOOT, DRIBBLE]))
+        if random.random() < 0.5: # more efficient than random.choice for 2
+          hfo_env.act(hfo.SHOOT)
        else:
-        hfo.act(MOVE)
+          hfo_env.act(hfo.DRIBBLE)
+      else:
+        hfo_env.act(hfo.MOVE)
      # Advance the environment and get the game status
-      status = hfo.step()
+      status = hfo_env.step()
    # Check the outcome of the episode
-    print(('Episode %d ended with %s'%(episode, hfo.statusToString(status))))
+    print(('Episode %d ended with %s'%(episode,
+                                       hfo_env.statusToString(status))))
    # Quit if the server goes down
-    if status == SERVER_DOWN:
+    if status == hfo.SERVER_DOWN:
-      hfo.act(QUIT)
+      hfo_env.act(hfo.QUIT)
      exit()
 if __name__ == '__main__':

--- a/example/python_agents_3v3.sh
+++ b/example/python_agents_3v3.sh
@@ -3,9 +3,9 @@
 ./bin/HFO --offense-agents=2 --defense-npcs=3 --offense-npcs=1 --trials 20 --headless &
 sleep 5
 # -x is needed to skip first line - otherwise whatever default python version is will run
-python2.7 -x ./example/high_level_custom_agent.py --numTeammates=2 --numOpponents=3 --port 6000 &> agent1.txt &
+python2.7 -x ./example/high_level_custom_agent.py --port 6000 &> agent1.txt &
 sleep 5
-python3 -x ./example/high_level_custom_agent.py --numTeammates=2 --numOpponents=3 --port 6000 &> agent2.txt &
+python3 -x ./example/high_level_custom_agent.py --port 6000 &> agent2.txt &
 # The magic line
 #   $$ holds the PID for this script

--- a/example/python_agents_eps_3v3.sh
+++ b/example/python_agents_eps_3v3.sh
+#!/bin/bash
+./bin/HFO --offense-agents=2 --defense-npcs=3 --offense-npcs=1 --trials 20 --headless &
+sleep 5
+# -x is needed to skip first line - otherwise whatever default python version is will run
+python2.7 -x ./example/high_level_custom_agent.py --eps 0.2 --numTeammates=2 --numOpponents=3 --port 6000 &> agent1.txt &
+sleep 5
+python3 -x ./example/high_level_custom_agent.py --eps 0.2 --numTeammates=2 --numOpponents=3 --port 6000 &> agent2.txt &
+# The magic line
+#   $$ holds the PID for this script
+#   Negation means kill by process group id instead of PID
+trap "kill -TERM -$$" SIGINT
+wait
--- a/example/random_python_2v1.sh
+++ b/example/random_python_2v1.sh
+#!/bin/bash
+./bin/HFO --offense-agents=2 --defense-npcs=1 --trials 20 --headless &
+sleep 5
+python2.7 -x example/high_level_random_agent.py --port 6000 &> agent1.txt &
+sleep 5
+python3 -x example/high_level_random_agent.py --port 6000 &> agent2.txt &
+# The magic line
+#   $$ holds the PID for this script
+#   Negation means kill by process group id instead of PID
+trap "kill -TERM -$$" SIGINT
+wait
--- a/hfo/hfo.py
+++ b/hfo/hfo.py
@@ -79,6 +79,10 @@ hfo_lib.statusToString.argtypes = [c_int]
 hfo_lib.statusToString.restype = c_char_p
 hfo_lib.getUnum.argtypes = [c_void_p]
 hfo_lib.getUnum.restype = c_int
+hfo_lib.getNumTeammates.argtypes = [c_void_p]
+hfo_lib.getNumTeammates.restype = c_int
+hfo_lib.getNumOpponents.argtypes = [c_void_p]
+hfo_lib.getNumOpponents.restype = c_int
 class HFOEnvironment(object):
  def __init__(self):
@@ -154,3 +158,11 @@ class HFOEnvironment(object):
  def getUnum(self):
    """ Return the uniform number of the agent """
    return hfo_lib.getUnum(self.obj)
+  def getNumTeammates(self):
+    """ Returns the number of teammates of the agent """
+    return hfo_lib.getNumTeammates(self.obj)
+  def getNumOpponents(self):
+    """ Returns the number of opponents of the agent """
+    return hfo_lib.getNumOpponents(self.obj)
--- a/hfo/hfo_c_wrapper.h
+++ b/hfo/hfo_c_wrapper.h
@@ -48,6 +48,8 @@ extern "C" {
    return StatusToString(status).c_str();
  }
  int getUnum(hfo::HFOEnvironment *hfo) {return hfo->getUnum();}
+  int getNumTeammates(hfo::HFOEnvironment *hfo) {return hfo->getNumTeammates();}
+  int getNumOpponents(hfo::HFOEnvironment *hfo) {return hfo->getNumOpponents();}
 }
 #endif
--- a/src/HFO.cpp
+++ b/src/HFO.cpp
@@ -115,6 +115,14 @@ int HFOEnvironment::getUnum() {
  return agent->getUnum();
 }
+int HFOEnvironment::getNumTeammates() {
+  return agent->getNumTeammates();
+}
+int HFOEnvironment::getNumOpponents() {
+  return agent->getNumOpponents();
+}
 Player HFOEnvironment::playerOnBall() {
  return agent->getPlayerOnBall();
 }

--- a/src/HFO.hpp
+++ b/src/HFO.hpp
@@ -49,6 +49,12 @@ class HFOEnvironment {
  // Returns the uniform number of the player
  virtual int getUnum();
+  // Returns the number of teammates
+  virtual int getNumTeammates();
+  // Returns the number of opponents
+  virtual int getNumOpponents();
  // Get the current player holding the ball
  virtual Player playerOnBall();

--- a/src/agent.cpp
+++ b/src/agent.cpp
@@ -212,16 +212,16 @@ bool Agent::initImpl(CmdLineParser & cmd_parser) {
 }
 FeatureExtractor* Agent::getFeatureExtractor(feature_set_t feature_set_indx,
-                                             int num_teammates,
+                                             int numTeammates,
-                                             int num_opponents,
+                                             int numOpponents,
                                             bool playing_offense) {
  switch (feature_set_indx) {
    case LOW_LEVEL_FEATURE_SET:
-      return new LowLevelFeatureExtractor(num_teammates, num_opponents,
+      return new LowLevelFeatureExtractor(numTeammates, numOpponents,
                                          playing_offense);
      break;
    case HIGH_LEVEL_FEATURE_SET:
-      return new HighLevelFeatureExtractor(num_teammates, num_opponents,
+      return new HighLevelFeatureExtractor(numTeammates, numOpponents,
                                           playing_offense);
      break;
    default:
@@ -336,9 +336,9 @@ Agent::ProcessTrainerMessages()
      hfo::Config hfo_config;
      if (hfo::ParseConfig(message, hfo_config)) {
        bool playing_offense = world().ourSide() == rcsc::LEFT;
-        int num_teammates = playing_offense ?
+        num_teammates = playing_offense ?
 	  hfo_config.num_offense - 1 : hfo_config.num_defense - 1;
-        int num_opponents = playing_offense ?
+        num_opponents = playing_offense ?
 	  hfo_config.num_defense : hfo_config.num_offense;
        feature_extractor = getFeatureExtractor(
            feature_set, num_teammates, num_opponents, playing_offense);
@@ -804,10 +804,12 @@ bool Agent::doMarkPlayer(int unum) {
  int count = 0;
  for ( PlayerPtrCont::const_iterator it = wm.opponentsFromSelf().begin(); it != o_end; ++it ) {
      if ( (*it)->distFromBall() < 5 ) {
+	if ((kicker_unum == -1) || (kicker_unum != unum)) { // try to obey action instruction
          kicker_pos = (*it)->pos();
          kicker_unum = (*it)->unum();
 	}
      }
+  }
  for ( PlayerPtrCont::const_iterator it = wm.opponentsFromSelf().begin(); it !=  o_end; ++it ) {
 	  if ( (*it)-> unum() == unum ) {

--- a/src/agent.h
+++ b/src/agent.h
@@ -61,6 +61,8 @@ protected:
  std::string say_msg, hear_msg;       // Messages to/from teammates
  hfo::action_t requested_action;      // Currently requested action
  std::vector<float> params;           // Parameters of current action
+  int num_teammates;                   // Number of teammates
+  int num_opponents;                   // Number of opponents
 public:
  inline const std::vector<float>& getState() { return state; }
@@ -68,6 +70,8 @@ protected:
  inline const hfo::Player& getPlayerOnBall() { return player_on_ball; }
  inline const std::string& getHearMsg() { return hear_msg; }
  int getUnum(); // Returns the uniform number of the player
+  inline int getNumTeammates() { return num_teammates; }
+  inline int getNumOpponents() { return num_opponents; }
  inline void setFeatureSet(hfo::feature_set_t fset) { feature_set = fset; }
  inline std::vector<float>* mutable_params() { return &params; }

--- a/tests/test_with_server.py
+++ b/tests/test_with_server.py
@@ -53,6 +53,12 @@ def test_with_server():
        print("My unum is {!s}".format(my_unum))
+        num_teammates = hfo_env.getNumTeammates()
+        assert (num_teammates == 2), "Wrong num teammates ({!r})".format(num_teammates)
+        num_opponents = hfo_env.getNumOpponents()
+        assert (num_opponents == 2), "Wrong num opponents ({!r})".format(num_opponents)
        had_ok_unum = False
        had_ok_unum_set_my_side = set()
        had_ok_unum_set_their_side = set();

--- a/tests/test_with_server_fullstate.py
+++ b/tests/test_with_server_fullstate.py
@@ -54,6 +54,12 @@ def test_with_server():
        print("My unum is {!s}".format(my_unum))
+        num_teammates = hfo_env.getNumTeammates()
+        assert (num_teammates == 2), "Wrong num teammates ({!r})".format(num_teammates)
+        num_opponents = hfo_env.getNumOpponents()
+        assert (num_opponents == 2), "Wrong num opponents ({!r})".format(num_opponents)
        had_ok_unum = False
        had_ok_unum_set_my_side = set()
        had_ok_unum_set_their_side = set();