Polised the agent server interface.

7a4eddf1 · Matthew Hausknecht · 44277b38 · 7a4eddf1 · 7a4eddf1 · 7a4eddf1
Commit 7a4eddf1 authored Mar 09, 2015 by Matthew Hausknecht
11 changed files
--- a/HFO.py
+++ b/HFO.py
 import socket, struct, thread, time

-class Actions:
+class HFO_Actions:
  ''' An enum of the possible HFO actions

  Dash(power, relative_direction)
@@ -9,7 +9,11 @@ class Actions:
  Kick(power, direction)

  '''
-  DASH, TURN, TACKLE, KICK = range(4)
+  DASH, TURN, TACKLE, KICK, QUIT = range(5)
+
+class HFO_Status:
+  ''' Current status of the HFO game. '''
+  IN_GAME, GOAL, CAPTURED_BY_DEFENSE, OUT_OF_BOUNDS, OUT_OF_TIME = range(5)


 class HFOEnvironment(object):
@@ -17,11 +21,9 @@ class HFOEnvironment(object):
  between a learning agent and the Half-Field-Offense domain.

  '''
-
  def __init__(self):
    self.socket = None # Socket connection to server
    self.numFeatures = None # Given by the server in handshake
-    self.actions = ['DASH', 'TURN', 'TACKLE', 'KICK']

  def connectToAgentServer(self, server_port=6008):
    '''Connect to the server that controls the agent on the specified port. '''
@@ -51,6 +53,10 @@ class HFOEnvironment(object):
    self.numFeatures = struct.unpack("i", data)[0]
    # Send what we recieved
    self.socket.send(struct.pack("i", self.numFeatures))
+    # Get the current game status
+    data = self.socket.recv(struct.calcsize("i"))
+    status = struct.unpack("i", data)[0]
+    assert status == HFO_Status.IN_GAME, "Status check failed"
    print '[Agent Client] Handshake complete'

  def getState(self):
@@ -65,11 +71,14 @@ class HFOEnvironment(object):
    return features

  def act(self, action):
-    ''' Send an action and recieve the resulting reward from the environment.'''
+    ''' Send an action and recieve the game status.'''
    self.socket.send(struct.pack("iff", *action))
-    # TODO: Get the rewards from the domain
-    return 0
+    # Get the current game status
+    data = self.socket.recv(struct.calcsize("i"))
+    status = struct.unpack("i", data)[0]
+    return status

  def cleanup(self):
-    ''' Close the connection to the agent's server. '''
+    ''' Send a quit and close the connection to the agent's server. '''
+    self.socket.send(struct.pack("i", HFO_Actions.QUIT))
    self.socket.close()
--- a/README.md
+++ b/README.md
@@ -26,3 +26,7 @@ By default if your agent takes longer then two seconds to select an action it wi
 ```bash
 ./bin/start.py
 ```
+and in a seperate terminal
+```bash
+./examples/hfo_example_agent
+```
--- a/bin/Trainer.py
+++ b/bin/Trainer.py
@@ -97,10 +97,12 @@ class Trainer(object):
    self._agentNumExt = self.convertToExtPlayer(self._agentTeam,
                                                self._agentNumInt)
    agentCmd = 'start_agent.sh -t %s -u %i --numTeammates %i --numOpponents %i'\
-               %(self._agentTeam, self._agentNumExt, numTeammates, numOpponents)
+               ' --playingOffense %i'\
+               %(self._agentTeam, self._agentNumExt, numTeammates, numOpponents,
+                 self._agent_play_offense)
    agentCmd = agentCmd.split(' ')
    # Ignore stderr because librcsc continually prints to it
-    kwargs = {}#'stderr':open('/dev/null','w')}
+    kwargs = {'stderr':open('/dev/null','w')}
    p = subprocess.Popen(agentCmd, **kwargs)
    p.wait()
    with open('/tmp/start%i' % p.pid,'r') as f:
@@ -470,7 +472,7 @@ class Trainer(object):
    self.resetPlayerPositions()
    self.send('(recover)')
    self.send('(change_mode play_on)')
-    self.send('(say RESET)')
+    # self.send('(say RESET)')

  def resetBallPosition(self):
    """Reset the position of the ball for a new HFO trial. """
@@ -555,16 +557,20 @@ class Trainer(object):
    if self.isGoal():
      self._numGoals += 1
      result = 'Goal'
+      self.send('(say GOAL)')
    elif self.isOOB():
      self._numBallsOOB += 1
      result = 'Out of Bounds'
+      self.send('(say OUT_OF_BOUNDS)')
    elif team_holding_ball not in [None,self._offenseTeamInd]:
      self._numBallsCaptured += 1
      result = 'Defense Captured'
+      self.send('(say CAPTURED_BY_DEFENSE)')
    elif self._frame - self._lastFrameBallTouched > self.UNTOUCHED_LENGTH:
      self._lastFrameBallTouched = self._frame
      self._numOutOfTime += 1
      result = 'Ball untouched for too long'
+      self.send('(say OUT_OF_TIME)')
    else:
      print '[Trainer] Error: Unable to detect reason for End of Trial!'
      sys.exit(1)

--- a/bin/start.py
+++ b/bin/start.py
@@ -2,7 +2,7 @@
 # encoding: utf-8

 import subprocess, os, time, numpy, sys
-from signal import SIGINT
+from signal import SIGKILL

 # Global list of all/essential running processes
 processes, necProcesses = [], []
@@ -72,11 +72,11 @@ def main(args, team1='left', team2='right', rng=numpy.random.RandomState()):
    # Run HFO
    trainer.run(necProcesses)
  except KeyboardInterrupt:
-    print 'Exiting for CTRL-C'
+    print '[start.py] Exiting for CTRL-C'
  finally:
    for p in processes:
      try:
-        p.send_signal(SIGINT)
+        p.send_signal(SIGKILL)
      except:
        pass
      time.sleep(0.1)

--- a/bin/start_agent.sh
+++ b/bin/start_agent.sh
@@ -76,6 +76,7 @@ usage()
   echo "  --teammate STR               name of teammates"
   echo "  --numTeammates NUM           number of teammates"
   echo "  --numOpponents NUM           number of opponents"
+   echo "  --playingOffense [0|1]       are we playing offense or defense"
   echo "  --seed NUM                   seed for rng"
   echo "  --gdb                        runs with gdb on (default:off)"
   ) 1>&2
@@ -255,6 +256,15 @@ do
      shift 1
      ;;

+    --playingOffense)
+      if [ $# -lt 2 ]; then
+        usage
+        exit 1
+      fi
+      opts="${opts} --playingOffense ${2}"
+      shift 1
+      ;;
+
    --seed)
      if [ $# -lt 2 ]; then
        usage

--- a/example/hfo_example_agent.cpp
+++ b/example/hfo_example_agent.cpp
@@ -4,21 +4,42 @@

 using namespace std;

-// First Start the server by calling start.py in bin
+// First Start the server: $> bin/start.py

 int main() {
  // Create the HFO environment
  HFOEnvironment hfo;
-  // Connect the agent's server which should be listening if
-  // ./bin/start.py was called.
+  // Connect the agent's server
  hfo.connectToAgentServer();
-  // Continue until finished
-  while (true) {
+  // Play 5 episodes
+  for (int episode=0; episode<5; episode++) {
+    hfo_status_t status = IN_GAME;
+    while (status == IN_GAME) {
      // Grab the vector of state features for the current state
      const std::vector<float>& feature_vec = hfo.getState();
      // Create a dash action
      Action a = {DASH, 100., 0.};
      // Perform the dash and recieve the reward
-    float reward = hfo.act(a);
+      status = hfo.act(a);
+    }
+    // Check what the outcome of the episode was
+    cout << "Episode " << episode << " ended with status: ";
+    switch (status) {
+      case GOAL:
+        cout << "goal" << endl;
+        break;
+      case CAPTURED_BY_DEFENSE:
+        cout << "captured by defense" << endl;
+        break;
+      case OUT_OF_BOUNDS:
+        cout << "out of bounds" << endl;
+        break;
+      case OUT_OF_TIME:
+        cout << "out of time" << endl;
+        break;
+      default:
+        cout << "Unknown status " << status << endl;
+        exit(1);
+    }
  }
 };
--- a/example/hfo_example_agent.py
+++ b/example/hfo_example_agent.py
@@ -3,21 +3,42 @@

 import imp

-# First Start the server by calling start.py in bin
+# First Start the server: $> bin/start.py

 if __name__ == '__main__':
  # Load the HFO library
+  try:
    hfo_module = imp.load_source('HFO', '../HFO.py')
+  except:
+    hfo_module = imp.load_source('HFO', 'HFO.py')
  # Get the possible actions
-  actions = hfo_module.Actions
+  HFO_Actions = hfo_module.HFO_Actions
+  # Get the possible outcomes
+  HFO_Status = hfo_module.HFO_Status
  # Create the HFO Environment
  hfo = hfo_module.HFOEnvironment()
+  # Connect to the agent server
  hfo.connectToAgentServer()
-  # Continue until finished
-  while True:
+  # Play 5 episodes
+  for episode in xrange(5):
+    status = HFO_Status.IN_GAME
+    while status == HFO_Status.IN_GAME:
      # Grab the state features from the environment
      features = hfo.getState()
      # Take an action and get the reward
-    reward = hfo.act((actions.KICK, 100, 12.3))
+      status = hfo.act((HFO_Actions.KICK, 100, 12.3))
+    print 'Episode', episode, 'ended with',
+    # Check what the outcome of the episode was
+    if status == HFO_Status.GOAL:
+      print 'goal'
+    elif status == HFO_Status.CAPTURED_BY_DEFENSE:
+      print 'captured by defense'
+    elif status == HFO_Status.OUT_OF_BOUNDS:
+      print 'out of bounds'
+    elif status == HFO_Status.OUT_OF_TIME:
+      print 'out of time'
+    else:
+      print 'Unknown status', status
+      exit()
  # Cleanup when finished
  hfo.cleanup()
--- a/include/HFO.hpp
+++ b/include/HFO.hpp
@@ -23,9 +23,8 @@ class HFOEnvironment {
  //   TACKLE, // Tackle(direction)
  //   KICK    // Kick(power, direction)
  // };
-
-  // Take an action and recieve the resulting reward.
-  float act(Action action);
+  // Take an action and recieve the resulting game status
+  hfo_status_t act(Action action);

 protected:
  int numFeatures; // The number of features in this domain

--- a/src/HFO.cpp
+++ b/src/HFO.cpp
@@ -16,6 +16,11 @@ void error(const char *msg) {

 HFOEnvironment::HFOEnvironment() {}
 HFOEnvironment::~HFOEnvironment() {
+  // Send a quit action and close the connection to the agent's server
+  action_t quit = QUIT;
+  if (send(sockfd, &quit, sizeof(int), 0) < 0) {
+    error("[Agent Client] ERROR sending from socket");
+  }
  close(sockfd);
 }

@@ -69,6 +74,15 @@ void HFOEnvironment::handshakeAgentServer() {
  if (send(sockfd, &numFeatures, sizeof(int), 0) < 0) {
    error("[Agent Client] ERROR sending from socket");
  }
+  // Recieve the game status
+  hfo_status_t status;
+  if (recv(sockfd, &status, sizeof(hfo_status_t), 0) < 0) {
+    error("[Agent Client] ERROR recv from socket");
+  }
+  if (status != IN_GAME) {
+    std::cout << "[Agent Client] Handshake failed: status check." << std::endl;
+    exit(1);
+  }
  std::cout << "[Agent Client] Handshake complete" << std::endl;
 }

@@ -82,9 +96,15 @@ const std::vector<float>& HFOEnvironment::getState() {
  return feature_vec;
 }

-float HFOEnvironment::act(Action action) {
+hfo_status_t HFOEnvironment::act(Action action) {
+  hfo_status_t game_status;
+  // Send the action
  if (send(sockfd, &action, sizeof(Action), 0) < 0) {
    error("[Agent Client] ERROR sending from socket");
  }
-  return 0.;
+  // Get the game status
+  if (recv(sockfd, &game_status, sizeof(hfo_status_t), 0) < 0) {
+    error("[Agent Client] ERROR recieving from socket");
+  }
+  return game_status;
 }
--- a/src/agent.cpp
+++ b/src/agent.cpp
@@ -110,6 +110,8 @@ Agent::Agent()
      M_field_evaluator(createFieldEvaluator()),
      M_action_generator(createActionGenerator()),
      numTeammates(-1), numOpponents(-1), numFeatures(-1),
+      lastTrainerMessageTime(-1),
+      episode_start(true),
      server_running(false)
 {
    boost::shared_ptr< AudioMemory > audio_memory( new AudioMemory );
@@ -173,6 +175,8 @@ bool Agent::initImpl(CmdLineParser & cmd_parser) {
    rcsc::ParamMap my_params("Additional options");
    my_params.add()("numTeammates", "", &numTeammates, "number of teammates");
    my_params.add()("numOpponents", "", &numOpponents, "number of opponents");
+    my_params.add()("playingOffense", "", &playingOffense,
+                    "are we playing offense or defense");

    cmd_parser.parse(my_params);
    if (cmd_parser.count("help") > 0) {
@@ -426,6 +430,27 @@ void Agent::clientHandshake() {
  std::cout << "[Agent Server] Handshake complete" << std::endl;
 }

+hfo_status_t Agent::getGameStatus() {
+  hfo_status_t game_status = IN_GAME;
+  if (audioSensor().trainerMessageTime().cycle() > lastTrainerMessageTime) {
+    lastTrainerMessageTime = audioSensor().trainerMessageTime().cycle();
+    const std::string& message = audioSensor().trainerMessage();
+    if (message.compare("GOAL") == 0) {
+      game_status = GOAL;
+    } else if (message.compare("CAPTURED_BY_DEFENSE") == 0) {
+      game_status = CAPTURED_BY_DEFENSE;
+    } else if (message.compare("OUT_OF_BOUNDS") == 0) {
+      game_status = OUT_OF_BOUNDS;
+    } else if (message.compare("OUT_OF_TIME") == 0) {
+      game_status = OUT_OF_TIME;
+    } else {
+      std::cout << "[Agent Server] Unrecognized Trainer Message: " << message
+                << std::endl;
+    }
+  }
+  return game_status;
+}
+
 /*!
  main decision
  virtual method in super class
@@ -436,10 +461,14 @@ void Agent::actionImpl() {
    clientHandshake();
  }

-  // Update the state features
-  updateStateFeatures();
+  // Update and send the game status
+  hfo_status_t game_status = getGameStatus();
+  if (send(newsockfd, &game_status, sizeof(int), 0) < 0) {
+    error("[Agent Server] ERROR sending from socket");
+  }

-  // Send the state features
+  // Update and send the state features
+  updateStateFeatures();
  if (send(newsockfd, &(feature_vec.front()),
           numFeatures * sizeof(float), 0) < 0) {
    error("[Agent Server] ERROR sending state features from socket");
@@ -463,14 +492,15 @@ void Agent::actionImpl() {
    case KICK:
      this->doKick(action.arg1, action.arg2);
      break;
+    case QUIT:
+      std::cout << "[Agent Server] Got quit from agent." << std::endl;
+      exit(0);
    default:
      std::cerr << "[Agent Server] ERROR Unsupported Action: "
                << action.action << std::endl;
      exit(1);
  }

-  // TODO: How to get rewards?
-
  // For now let's not worry about turning the neck or setting the vision.
  this->setViewAction(new View_Tactical());
  this->setNeckAction(new Neck_TurnToBallOrScan());

--- a/src/agent.h
+++ b/src/agent.h
@@ -40,7 +40,18 @@ enum action_t
  DASH,   // Dash(power, relative_direction)
  TURN,   // Turn(direction)
  TACKLE, // Tackle(direction)
-  KICK    // Kick(power, direction)
+  KICK,   // Kick(power, direction)
+  QUIT    // Special action to quit the game
+};
+
+// The current status of the HFO game
+enum hfo_status_t
+{
+  IN_GAME,
+  GOAL,
+  CAPTURED_BY_DEFENSE,
+  OUT_OF_BOUNDS,
+  OUT_OF_TIME
 };

 struct Action {
@@ -76,12 +87,16 @@ protected:
  // Updated the state features stored in feature_vec
  void updateStateFeatures();

+  // Get the current game status
+  hfo_status_t getGameStatus();
+
  // Add the angle and distance to the landmark to the feature_vec
  void addLandmarkFeature(const rcsc::Vector2D& landmark,
                          const rcsc::Vector2D& self_pos);

  int numTeammates;
  int numOpponents;
+  bool playingOffense; // Are we playing offense or defense?
  int numFeatures; // Total number of features
  // Number of features for non-player objects. Clearly this is the answer.
  const static int num_basic_features = 42;
@@ -90,6 +105,8 @@ protected:
  std::vector<float> feature_vec; // Contains the current features
  int featIndx; // Feature being populated
  const static int server_port = 6008;
+  long lastTrainerMessageTime; // Last time the trainer sent a message
+  bool episode_start; // True only in the timestep that the game is starting

  // Start the server and listen for a connection.
  virtual void startServer();