Added python interface to HFO domain.

774fc85b · Matthew Hausknecht · dcf7f971 · 774fc85b · 774fc85b · 774fc85b
Commit 774fc85b authored Mar 06, 2015 by Matthew Hausknecht
Showing with 258 additions and 34 deletions

CMakeLists.txt CMakeLists.txt +4 -0

HFO.py HFO.py +91 -0

bin/Trainer.py bin/Trainer.py +18 -18

bin/start.py bin/start.py +6 -4

src/agent.cpp src/agent.cpp +128 -11

src/agent.h src/agent.h +11 -1

No files found.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
 cmake_minimum_required(VERSION 2.8.3)
+find_package(Threads REQUIRED)
+# find_package(XMLRPC REQUIRED c++)
 project(hfo)
 # Change these to reflect the location of your librcsc-4.1.0 install
@@ -12,6 +14,7 @@ include_directories(
  ${SOURCE_DIR}
  ${SOURCE_DIR}/chain_action
  ${LIBRCSC_INCLUDE}
+  # ${XMLRPC_INCLUDE_DIRS}
 )
 link_directories(
@@ -30,6 +33,7 @@ list(APPEND LINK_LIBS
  rcsc_gz
  rcsc_time
  rcsc_rcg
+  ${CMAKE_THREAD_LIBS_INIT}
 )
 add_executable(sample_coach ${SOURCE_DIR}/main_coach.cpp ${SOURCE_DIR}/sample_coach.cpp ${SOURCES})

--- a/HFO.py
+++ b/HFO.py
+import socket, struct, thread, time
+class HFOEnvironment(object):
+  '''The HFOEnvironment is designed to be the single point of contact
+  between a learning agent and the Half-Field-Offense domain.
+  '''
+  def __init__(self):
+    self.socket = None # Socket connection to server
+    self.numFeatures = None # Given by the server in handshake
+    self.trainerThreadID = None # Thread of the trainer process
+    self.actions = ['DASH', 'TURN', 'TACKLE', 'KICK']
+  def startDomain(self, args=[]):
+    '''Covenience method to start the HFO domain by calling the
+    /bin/start.py script and providing it kwargs. Call this method
+    before connectToAgentServer.
+    args: a list of argument strings passed to the start script.
+    (e.g. ['--offense','3']). See ./bin/start.py -h for all args.
+    '''
+    # This method calls the trainer in bin directory
+    def runTrainer():
+      from bin import start
+      start.main(start.parseArgs(args))
+    self.trainerThreadID = thread.start_new_thread(runTrainer,())
+    time.sleep(2)
+  def connectToAgentServer(self, server_port=6008):
+    '''Connect to the server that controls the agent on the specified port. '''
+    self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    print '[Agent Client] Connecting to Agent Server on port', server_port
+    while True:
+      try:
+        self.socket.connect(('localhost', server_port))
+      except:
+        time.sleep(1)
+        continue
+      else:
+        break
+    print '[Agent Client] Connected', server_port
+    self.handshakeAgentServer()
+  def handshakeAgentServer(self):
+    '''Handshake with the agent's server. Returns the number of state
+    features in the domain. '''
+    # Recieve float 123.2345
+    data = self.socket.recv(struct.calcsize("f"))
+    f = struct.unpack("f", data)[0]
+    assert abs(f - 123.2345) < 1e-4, "Float handshake failed"
+    # Send float 5432.321
+    self.socket.send(struct.pack("f", 5432.321))
+    # Recieve the number of features
+    data = self.socket.recv(struct.calcsize("i"))
+    self.numFeatures = struct.unpack("i", data)[0]
+    # Send what we recieved
+    self.socket.send(struct.pack("i", self.numFeatures))
+    print '[Agent Client] Handshake complete'
+  def getState(self):
+    '''Get the current state of the world. Returns a list of floats with
+    size numFeatures. '''
+    data = self.socket.recv(struct.calcsize('f')*self.numFeatures)
+    if not data:
+      print '[Agent Client] ERROR Recieved bad data from Server. Perhaps server closed?'
+      self.cleanup()
+      exit(1)
+    features = struct.unpack('f'*self.numFeatures, data)
+    return features
+  def act(self, action_number):
+    ''' Send an action and recieve the resulting reward from the environment.'''
+    self.socket.send(struct.pack("i", action_number))
+    return 0
+  def cleanup(self):
+    ''' Close the connection to the agent's server. '''
+    self.socket.close()
+    if self.trainerThreadID is not None:
+      thread.interrupt_main()
+if __name__ == '__main__':
+  hfo = HFOEnvironment()
+  trainer_args = '--offense 1 --defense 0 --headless'.split(' ')
+  hfo.startDomain(trainer_args)
+  hfo.connectToAgentServer()
+  while True:
+    features = hfo.getState()
+    reward = hfo.act(0)
+  hfo.cleanup()
--- a/bin/Trainer.py
+++ b/bin/Trainer.py
@@ -100,7 +100,7 @@ class Trainer(object):
               %(self._agentTeam, self._agentNumExt, numTeammates, numOpponents)
    agentCmd = agentCmd.split(' ')
    # Ignore stderr because librcsc continually prints to it
-    kwargs = {'stderr':open('/dev/null','w')}
+    kwargs = {}#'stderr':open('/dev/null','w')}
    p = subprocess.Popen(agentCmd, **kwargs)
    p.wait()
    with open('/tmp/start%i' % p.pid,'r') as f:
@@ -480,23 +480,23 @@ class Trainer(object):
  def getOffensiveResetPosition(self):
    """ Returns a random position for an offensive player. """
-    # offsets = [
+    offsets = [
-    #   [-1,-1],
+      [-1,-1],
-    #   [-1,1],
+      [-1,1],
-    #   [1,1],
+      [1,1],
-    #   [1,-1],
+      [1,-1],
-    #   [0,2],
+      [0,2],
-    #   [0,-2],
+      [0,-2],
-    #   [-2,-2],
+      [-2,-2],
-    #   [-2,2],
+      [-2,2],
-    #   [2,2],
+      [2,2],
-    #   [2,-2],
+      [2,-2],
-    # ]
+    ]
-    # offset = offsets[self._rng.randint(len(offsets))]
+    offset = offsets[self._rng.randint(len(offsets))]
-    # offset_from_ball = 0.1 * self.PITCH_LENGTH * self._rng.rand(2) + \
+    offset_from_ball = 0.1 * self.PITCH_LENGTH * self._rng.rand(2) + \
-    #                    0.1 * self.PITCH_LENGTH * numpy.array(offset)
+                       0.1 * self.PITCH_LENGTH * numpy.array(offset)
-    # return self.boundPoint(self._ballPosition + offset_from_ball)
+    return self.boundPoint(self._ballPosition + offset_from_ball)
-    return self._ballPosition
+    # return self._ballPosition
  def getDefensiveResetPosition(self):
    """ Returns a random position for a defensive player. """

--- a/bin/start.py
+++ b/bin/start.py
@@ -37,7 +37,7 @@ def launch(cmd, necessary=True, supressOutput=True, name='Unknown'):
    necProcesses.append([p,name])
  return p
-def main(team1, team2, rng, args):
+def main(args, team1='left', team2='right', rng=numpy.random.RandomState()):
  """Sets up the teams, launches the server and monitor, starts the
  trainer.
  """
@@ -81,7 +81,7 @@ def main(team1, team2, rng, args):
        pass
    time.sleep(0.1)
-if __name__ == '__main__':
+def parseArgs(args=None):
  import argparse
  p = argparse.ArgumentParser(description='Start Half Field Offense.')
  p.add_argument('--headless', dest='headless', action='store_true',
@@ -101,5 +101,7 @@ if __name__ == '__main__':
                 help='Don\'t use a learning agent.')
  p.add_argument('--no-sync', dest='sync', action='store_false', default=True,
                 help='Run server in non-sync mode')
-  args = p.parse_args()
+  return p.parse_args(args=args)
-  main(team1='left', team2='right', rng=numpy.random.RandomState(), args=args)
+if __name__ == '__main__':
+  main(parseArgs())
--- a/src/agent.cpp
+++ b/src/agent.cpp
@@ -84,11 +84,22 @@
 #include <sstream>
 #include <string>
 #include <cstdlib>
+#include <stdio.h>
-#include <boost/interprocess/managed_shared_memory.hpp>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
 using namespace rcsc;
+// Socket Error
+void error(const char *msg)
+{
+    perror(msg);
+    exit(1);
+}
 #define ADD_FEATURE(val) \
  assert(featIndx < numFeatures); \
  feature_vec[featIndx++] = val;
@@ -98,7 +109,8 @@ Agent::Agent()
      M_communication(),
      M_field_evaluator(createFieldEvaluator()),
      M_action_generator(createActionGenerator()),
-      numTeammates(-1), numOpponents(-1), numFeatures(-1)
+      numTeammates(-1), numOpponents(-1), numFeatures(-1),
+      server_running(false)
 {
    boost::shared_ptr< AudioMemory > audio_memory( new AudioMemory );
@@ -146,6 +158,12 @@ Agent::Agent()
    M_communication = Communication::Ptr(new SampleCommunication());
 }
+Agent::~Agent() {
+  std::cout << "[Agent Server] Closing Server." << std::endl;
+  close(newsockfd);
+  close(sockfd);
+}
 bool Agent::initImpl(CmdLineParser & cmd_parser) {
    bool result = PlayerAgent::initImpl(cmd_parser);
@@ -353,15 +371,108 @@ void Agent::addLandmarkFeature(const rcsc::Vector2D& landmark,
  ADD_FEATURE(vec_to_landmark.r());
 }
-/*-------------------------------------------------------------------*/
+void Agent::startServer() {
+  std::cout << "Starting Server on Port " << server_port << std::endl;
+  struct sockaddr_in serv_addr, cli_addr;
+  sockfd = socket(AF_INET, SOCK_STREAM, 0);
+  if (sockfd < 0) {
+    error("[Agent Server] ERROR opening socket");
+  }
+  bzero((char *) &serv_addr, sizeof(serv_addr));
+  serv_addr.sin_family = AF_INET;
+  serv_addr.sin_addr.s_addr = INADDR_ANY;
+  serv_addr.sin_port = htons(server_port);
+  if (bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) {
+    error("[Agent Server] ERROR on binding");
+  }
+  listen(sockfd, 5);
+  socklen_t clilen = sizeof(cli_addr);
+  std::cout << "[Agent Server] Waiting for client to connect... " << std::endl;
+  newsockfd = accept(sockfd, (struct sockaddr *) &cli_addr, &clilen);
+  if (newsockfd < 0) {
+    error("[Agent Server] ERROR on accept");
+  }
+  std::cout << "[Agent Server] Connected" << std::endl;
+  server_running = true;
+}
+void Agent::clientHandshake() {
+  // Send float 123.2345
+  float f = 123.2345;
+  if (send(newsockfd, &f, sizeof(float), 0) < 0) {
+    error("[Agent Server] ERROR sending from socket");
+  }
+  // Recieve float 5432.321
+  if (recv(newsockfd, &f, sizeof(float), 0) < 0) {
+    error("[Agent Server] ERROR recv from socket");
+  }
+  // Check that error is within bounds
+  if (abs(f - 5432.321) > 1e-4) {
+    error("[Agent Server] Handshake failed. Improper float recieved.");
+  }
+  // Send the number of features
+  assert(numFeatures > 0);
+  if (send(newsockfd, &numFeatures, sizeof(int), 0) < 0) {
+    error("[Agent Server] ERROR sending from socket");
+  }
+  // Check that client has recieved correctly
+  int client_response = -1;
+  if (recv(newsockfd, &client_response, sizeof(int), 0) < 0) {
+    error("[Agent Server] ERROR recv from socket");
+  }
+  if (client_response != numFeatures) {
+    error("[Agent Server] Client incorrectly parsed the number of features.");
+  }
+  std::cout << "[Agent Server] Handshake complete" << std::endl;
+}
 /*!
  main decision
  virtual method in super class
 */
 void Agent::actionImpl() {
+  if (!server_running) {
+    startServer();
+    clientHandshake();
+  }
+  // Update the state features
  updateStateFeatures();
-  // Do decision making here
+  // Send the state features
+  if (send(newsockfd, &(feature_vec.front()),
+           numFeatures * sizeof(float), 0) < 0) {
+    error("[Agent Server] ERROR sending state features from socket");
+  }
+  // Get the action
+  action_t action;
+  if (recv(newsockfd, &action, sizeof(int), 0) < 0) {
+    error("[Agent Server] ERROR recv from socket");
+  }
+  switch(action) {
+    case DASH:
+      this->doDash(100., 0);
+      break;
+    case TURN:
+      this->doTurn(10);
+      break;
+    case TACKLE:
+      this->doTackle(0, false);
+      break;
+    case KICK:
+      this->doKick(100., 0);
+      break;
+    default:
+      error("[Agent Server] Unsupported Action!");
+  }
+  // char buffer[256];
+  // bzero(buffer,256);
+  // if (read(newsockfd,buffer,255) < 0) {
+  //   error("[Agent Server] ERROR reading from socket");
+  // }
+  // printf("Here is the message: %s\n",buffer);
  // TODO: How to get rewards?
@@ -369,6 +480,12 @@ void Agent::actionImpl() {
  this->setViewAction(new View_Tactical());
  this->setNeckAction(new Neck_TurnToBallOrScan());
+  // ======================== Actions ======================== //
+  // 0: Dash(power, relative_direction)
+  // 1: Turn(direction)
+  // 2: Tackle(direction)
+  // 3: Kick(power, direction)
  // Dash with power [-100,100]. Negative values move backwards. The
  // relative_dir [-180,180] is the direction to dash in. This should
  // be set every step.
@@ -416,14 +533,14 @@ void Agent::actionImpl() {
  // Dribble is omitted because it consists of dashes, turns, and kicks
  // sleep(1);
-  static int i=0;
+  // static int i=0;
-  i++;
+  // i++;
-  if (i % 2 == 0) {
+  // if (i % 2 == 0) {
-    this->doDash(10., 0);
+  //   this->doDash(10., 0);
-  } else {
+  // } else {
    // this->doKick(2., 0);
    // this->doTurn(5);
-  }
+  // }
 }
 /*-------------------------------------------------------------------*/

--- a/src/agent.h
+++ b/src/agent.h
@@ -37,9 +37,11 @@
 class Agent : public rcsc::PlayerAgent {
 public:
  Agent();
-  virtual ~Agent() {};
+  virtual ~Agent();
  virtual FieldEvaluator::ConstPtr getFieldEvaluator() const;
+  enum action_t { DASH, TURN, TACKLE, KICK };
 protected:
  // You can override this method. But you must call
  // PlayerAgent::initImpl() in this method.
@@ -74,6 +76,12 @@ protected:
  const static int features_per_player = 5;
  std::vector<float> feature_vec; // Contains the current features
  int featIndx; // Feature being populated
+  const static int server_port = 6008;
+  // Start the server and listen for a connection.
+  virtual void startServer();
+  // Transmit information to the client and ensure it can recieve.
+  virtual void clientHandshake();
 private:
  bool doPreprocess();
@@ -84,6 +92,8 @@ protected:
  Communication::Ptr M_communication;
  FieldEvaluator::ConstPtr M_field_evaluator;
  ActionGenerator::ConstPtr M_action_generator;
+  bool server_running; // Is the server running?
+  int sockfd, newsockfd; // Server sockets
 };
 #endif