Commit 774fc85b authored by Matthew Hausknecht's avatar Matthew Hausknecht

Added python interface to HFO domain.

parent dcf7f971
cmake_minimum_required(VERSION 2.8.3)
find_package(Threads REQUIRED)
# find_package(XMLRPC REQUIRED c++)
project(hfo)
# Change these to reflect the location of your librcsc-4.1.0 install
......@@ -12,6 +14,7 @@ include_directories(
${SOURCE_DIR}
${SOURCE_DIR}/chain_action
${LIBRCSC_INCLUDE}
# ${XMLRPC_INCLUDE_DIRS}
)
link_directories(
......@@ -30,6 +33,7 @@ list(APPEND LINK_LIBS
rcsc_gz
rcsc_time
rcsc_rcg
${CMAKE_THREAD_LIBS_INIT}
)
add_executable(sample_coach ${SOURCE_DIR}/main_coach.cpp ${SOURCE_DIR}/sample_coach.cpp ${SOURCES})
......
import socket, struct, thread, time
class HFOEnvironment(object):
'''The HFOEnvironment is designed to be the single point of contact
between a learning agent and the Half-Field-Offense domain.
'''
def __init__(self):
self.socket = None # Socket connection to server
self.numFeatures = None # Given by the server in handshake
self.trainerThreadID = None # Thread of the trainer process
self.actions = ['DASH', 'TURN', 'TACKLE', 'KICK']
def startDomain(self, args=[]):
'''Covenience method to start the HFO domain by calling the
/bin/start.py script and providing it kwargs. Call this method
before connectToAgentServer.
args: a list of argument strings passed to the start script.
(e.g. ['--offense','3']). See ./bin/start.py -h for all args.
'''
# This method calls the trainer in bin directory
def runTrainer():
from bin import start
start.main(start.parseArgs(args))
self.trainerThreadID = thread.start_new_thread(runTrainer,())
time.sleep(2)
def connectToAgentServer(self, server_port=6008):
'''Connect to the server that controls the agent on the specified port. '''
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
print '[Agent Client] Connecting to Agent Server on port', server_port
while True:
try:
self.socket.connect(('localhost', server_port))
except:
time.sleep(1)
continue
else:
break
print '[Agent Client] Connected', server_port
self.handshakeAgentServer()
def handshakeAgentServer(self):
'''Handshake with the agent's server. Returns the number of state
features in the domain. '''
# Recieve float 123.2345
data = self.socket.recv(struct.calcsize("f"))
f = struct.unpack("f", data)[0]
assert abs(f - 123.2345) < 1e-4, "Float handshake failed"
# Send float 5432.321
self.socket.send(struct.pack("f", 5432.321))
# Recieve the number of features
data = self.socket.recv(struct.calcsize("i"))
self.numFeatures = struct.unpack("i", data)[0]
# Send what we recieved
self.socket.send(struct.pack("i", self.numFeatures))
print '[Agent Client] Handshake complete'
def getState(self):
'''Get the current state of the world. Returns a list of floats with
size numFeatures. '''
data = self.socket.recv(struct.calcsize('f')*self.numFeatures)
if not data:
print '[Agent Client] ERROR Recieved bad data from Server. Perhaps server closed?'
self.cleanup()
exit(1)
features = struct.unpack('f'*self.numFeatures, data)
return features
def act(self, action_number):
''' Send an action and recieve the resulting reward from the environment.'''
self.socket.send(struct.pack("i", action_number))
return 0
def cleanup(self):
''' Close the connection to the agent's server. '''
self.socket.close()
if self.trainerThreadID is not None:
thread.interrupt_main()
if __name__ == '__main__':
hfo = HFOEnvironment()
trainer_args = '--offense 1 --defense 0 --headless'.split(' ')
hfo.startDomain(trainer_args)
hfo.connectToAgentServer()
while True:
features = hfo.getState()
reward = hfo.act(0)
hfo.cleanup()
......@@ -100,7 +100,7 @@ class Trainer(object):
%(self._agentTeam, self._agentNumExt, numTeammates, numOpponents)
agentCmd = agentCmd.split(' ')
# Ignore stderr because librcsc continually prints to it
kwargs = {'stderr':open('/dev/null','w')}
kwargs = {}#'stderr':open('/dev/null','w')}
p = subprocess.Popen(agentCmd, **kwargs)
p.wait()
with open('/tmp/start%i' % p.pid,'r') as f:
......@@ -480,23 +480,23 @@ class Trainer(object):
def getOffensiveResetPosition(self):
""" Returns a random position for an offensive player. """
# offsets = [
# [-1,-1],
# [-1,1],
# [1,1],
# [1,-1],
# [0,2],
# [0,-2],
# [-2,-2],
# [-2,2],
# [2,2],
# [2,-2],
# ]
# offset = offsets[self._rng.randint(len(offsets))]
# offset_from_ball = 0.1 * self.PITCH_LENGTH * self._rng.rand(2) + \
# 0.1 * self.PITCH_LENGTH * numpy.array(offset)
# return self.boundPoint(self._ballPosition + offset_from_ball)
return self._ballPosition
offsets = [
[-1,-1],
[-1,1],
[1,1],
[1,-1],
[0,2],
[0,-2],
[-2,-2],
[-2,2],
[2,2],
[2,-2],
]
offset = offsets[self._rng.randint(len(offsets))]
offset_from_ball = 0.1 * self.PITCH_LENGTH * self._rng.rand(2) + \
0.1 * self.PITCH_LENGTH * numpy.array(offset)
return self.boundPoint(self._ballPosition + offset_from_ball)
# return self._ballPosition
def getDefensiveResetPosition(self):
""" Returns a random position for a defensive player. """
......
......@@ -37,7 +37,7 @@ def launch(cmd, necessary=True, supressOutput=True, name='Unknown'):
necProcesses.append([p,name])
return p
def main(team1, team2, rng, args):
def main(args, team1='left', team2='right', rng=numpy.random.RandomState()):
"""Sets up the teams, launches the server and monitor, starts the
trainer.
"""
......@@ -81,7 +81,7 @@ def main(team1, team2, rng, args):
pass
time.sleep(0.1)
if __name__ == '__main__':
def parseArgs(args=None):
import argparse
p = argparse.ArgumentParser(description='Start Half Field Offense.')
p.add_argument('--headless', dest='headless', action='store_true',
......@@ -101,5 +101,7 @@ if __name__ == '__main__':
help='Don\'t use a learning agent.')
p.add_argument('--no-sync', dest='sync', action='store_false', default=True,
help='Run server in non-sync mode')
args = p.parse_args()
main(team1='left', team2='right', rng=numpy.random.RandomState(), args=args)
return p.parse_args(args=args)
if __name__ == '__main__':
main(parseArgs())
......@@ -84,11 +84,22 @@
#include <sstream>
#include <string>
#include <cstdlib>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
using namespace rcsc;
// Socket Error
void error(const char *msg)
{
perror(msg);
exit(1);
}
#define ADD_FEATURE(val) \
assert(featIndx < numFeatures); \
feature_vec[featIndx++] = val;
......@@ -98,7 +109,8 @@ Agent::Agent()
M_communication(),
M_field_evaluator(createFieldEvaluator()),
M_action_generator(createActionGenerator()),
numTeammates(-1), numOpponents(-1), numFeatures(-1)
numTeammates(-1), numOpponents(-1), numFeatures(-1),
server_running(false)
{
boost::shared_ptr< AudioMemory > audio_memory( new AudioMemory );
......@@ -146,6 +158,12 @@ Agent::Agent()
M_communication = Communication::Ptr(new SampleCommunication());
}
Agent::~Agent() {
std::cout << "[Agent Server] Closing Server." << std::endl;
close(newsockfd);
close(sockfd);
}
bool Agent::initImpl(CmdLineParser & cmd_parser) {
bool result = PlayerAgent::initImpl(cmd_parser);
......@@ -353,15 +371,108 @@ void Agent::addLandmarkFeature(const rcsc::Vector2D& landmark,
ADD_FEATURE(vec_to_landmark.r());
}
/*-------------------------------------------------------------------*/
void Agent::startServer() {
std::cout << "Starting Server on Port " << server_port << std::endl;
struct sockaddr_in serv_addr, cli_addr;
sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd < 0) {
error("[Agent Server] ERROR opening socket");
}
bzero((char *) &serv_addr, sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
serv_addr.sin_addr.s_addr = INADDR_ANY;
serv_addr.sin_port = htons(server_port);
if (bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) {
error("[Agent Server] ERROR on binding");
}
listen(sockfd, 5);
socklen_t clilen = sizeof(cli_addr);
std::cout << "[Agent Server] Waiting for client to connect... " << std::endl;
newsockfd = accept(sockfd, (struct sockaddr *) &cli_addr, &clilen);
if (newsockfd < 0) {
error("[Agent Server] ERROR on accept");
}
std::cout << "[Agent Server] Connected" << std::endl;
server_running = true;
}
void Agent::clientHandshake() {
// Send float 123.2345
float f = 123.2345;
if (send(newsockfd, &f, sizeof(float), 0) < 0) {
error("[Agent Server] ERROR sending from socket");
}
// Recieve float 5432.321
if (recv(newsockfd, &f, sizeof(float), 0) < 0) {
error("[Agent Server] ERROR recv from socket");
}
// Check that error is within bounds
if (abs(f - 5432.321) > 1e-4) {
error("[Agent Server] Handshake failed. Improper float recieved.");
}
// Send the number of features
assert(numFeatures > 0);
if (send(newsockfd, &numFeatures, sizeof(int), 0) < 0) {
error("[Agent Server] ERROR sending from socket");
}
// Check that client has recieved correctly
int client_response = -1;
if (recv(newsockfd, &client_response, sizeof(int), 0) < 0) {
error("[Agent Server] ERROR recv from socket");
}
if (client_response != numFeatures) {
error("[Agent Server] Client incorrectly parsed the number of features.");
}
std::cout << "[Agent Server] Handshake complete" << std::endl;
}
/*!
main decision
virtual method in super class
*/
void Agent::actionImpl() {
if (!server_running) {
startServer();
clientHandshake();
}
// Update the state features
updateStateFeatures();
// Do decision making here
// Send the state features
if (send(newsockfd, &(feature_vec.front()),
numFeatures * sizeof(float), 0) < 0) {
error("[Agent Server] ERROR sending state features from socket");
}
// Get the action
action_t action;
if (recv(newsockfd, &action, sizeof(int), 0) < 0) {
error("[Agent Server] ERROR recv from socket");
}
switch(action) {
case DASH:
this->doDash(100., 0);
break;
case TURN:
this->doTurn(10);
break;
case TACKLE:
this->doTackle(0, false);
break;
case KICK:
this->doKick(100., 0);
break;
default:
error("[Agent Server] Unsupported Action!");
}
// char buffer[256];
// bzero(buffer,256);
// if (read(newsockfd,buffer,255) < 0) {
// error("[Agent Server] ERROR reading from socket");
// }
// printf("Here is the message: %s\n",buffer);
// TODO: How to get rewards?
......@@ -369,6 +480,12 @@ void Agent::actionImpl() {
this->setViewAction(new View_Tactical());
this->setNeckAction(new Neck_TurnToBallOrScan());
// ======================== Actions ======================== //
// 0: Dash(power, relative_direction)
// 1: Turn(direction)
// 2: Tackle(direction)
// 3: Kick(power, direction)
// Dash with power [-100,100]. Negative values move backwards. The
// relative_dir [-180,180] is the direction to dash in. This should
// be set every step.
......@@ -416,14 +533,14 @@ void Agent::actionImpl() {
// Dribble is omitted because it consists of dashes, turns, and kicks
// sleep(1);
static int i=0;
i++;
if (i % 2 == 0) {
this->doDash(10., 0);
} else {
// static int i=0;
// i++;
// if (i % 2 == 0) {
// this->doDash(10., 0);
// } else {
// this->doKick(2., 0);
// this->doTurn(5);
}
// }
}
/*-------------------------------------------------------------------*/
......
......@@ -37,9 +37,11 @@
class Agent : public rcsc::PlayerAgent {
public:
Agent();
virtual ~Agent() {};
virtual ~Agent();
virtual FieldEvaluator::ConstPtr getFieldEvaluator() const;
enum action_t { DASH, TURN, TACKLE, KICK };
protected:
// You can override this method. But you must call
// PlayerAgent::initImpl() in this method.
......@@ -74,6 +76,12 @@ protected:
const static int features_per_player = 5;
std::vector<float> feature_vec; // Contains the current features
int featIndx; // Feature being populated
const static int server_port = 6008;
// Start the server and listen for a connection.
virtual void startServer();
// Transmit information to the client and ensure it can recieve.
virtual void clientHandshake();
private:
bool doPreprocess();
......@@ -84,6 +92,8 @@ protected:
Communication::Ptr M_communication;
FieldEvaluator::ConstPtr M_field_evaluator;
ActionGenerator::ConstPtr M_action_generator;
bool server_running; // Is the server running?
int sockfd, newsockfd; // Server sockets
};
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment