Commit 7a4eddf1 authored by Matthew Hausknecht's avatar Matthew Hausknecht

Polised the agent server interface.

parent 44277b38
import socket, struct, thread, time import socket, struct, thread, time
class Actions: class HFO_Actions:
''' An enum of the possible HFO actions ''' An enum of the possible HFO actions
Dash(power, relative_direction) Dash(power, relative_direction)
...@@ -9,7 +9,11 @@ class Actions: ...@@ -9,7 +9,11 @@ class Actions:
Kick(power, direction) Kick(power, direction)
''' '''
DASH, TURN, TACKLE, KICK = range(4) DASH, TURN, TACKLE, KICK, QUIT = range(5)
class HFO_Status:
''' Current status of the HFO game. '''
IN_GAME, GOAL, CAPTURED_BY_DEFENSE, OUT_OF_BOUNDS, OUT_OF_TIME = range(5)
class HFOEnvironment(object): class HFOEnvironment(object):
...@@ -17,11 +21,9 @@ class HFOEnvironment(object): ...@@ -17,11 +21,9 @@ class HFOEnvironment(object):
between a learning agent and the Half-Field-Offense domain. between a learning agent and the Half-Field-Offense domain.
''' '''
def __init__(self): def __init__(self):
self.socket = None # Socket connection to server self.socket = None # Socket connection to server
self.numFeatures = None # Given by the server in handshake self.numFeatures = None # Given by the server in handshake
self.actions = ['DASH', 'TURN', 'TACKLE', 'KICK']
def connectToAgentServer(self, server_port=6008): def connectToAgentServer(self, server_port=6008):
'''Connect to the server that controls the agent on the specified port. ''' '''Connect to the server that controls the agent on the specified port. '''
...@@ -51,6 +53,10 @@ class HFOEnvironment(object): ...@@ -51,6 +53,10 @@ class HFOEnvironment(object):
self.numFeatures = struct.unpack("i", data)[0] self.numFeatures = struct.unpack("i", data)[0]
# Send what we recieved # Send what we recieved
self.socket.send(struct.pack("i", self.numFeatures)) self.socket.send(struct.pack("i", self.numFeatures))
# Get the current game status
data = self.socket.recv(struct.calcsize("i"))
status = struct.unpack("i", data)[0]
assert status == HFO_Status.IN_GAME, "Status check failed"
print '[Agent Client] Handshake complete' print '[Agent Client] Handshake complete'
def getState(self): def getState(self):
...@@ -65,11 +71,14 @@ class HFOEnvironment(object): ...@@ -65,11 +71,14 @@ class HFOEnvironment(object):
return features return features
def act(self, action): def act(self, action):
''' Send an action and recieve the resulting reward from the environment.''' ''' Send an action and recieve the game status.'''
self.socket.send(struct.pack("iff", *action)) self.socket.send(struct.pack("iff", *action))
# TODO: Get the rewards from the domain # Get the current game status
return 0 data = self.socket.recv(struct.calcsize("i"))
status = struct.unpack("i", data)[0]
return status
def cleanup(self): def cleanup(self):
''' Close the connection to the agent's server. ''' ''' Send a quit and close the connection to the agent's server. '''
self.socket.send(struct.pack("i", HFO_Actions.QUIT))
self.socket.close() self.socket.close()
...@@ -26,3 +26,7 @@ By default if your agent takes longer then two seconds to select an action it wi ...@@ -26,3 +26,7 @@ By default if your agent takes longer then two seconds to select an action it wi
```bash ```bash
./bin/start.py ./bin/start.py
``` ```
and in a seperate terminal
```bash
./examples/hfo_example_agent
```
...@@ -97,10 +97,12 @@ class Trainer(object): ...@@ -97,10 +97,12 @@ class Trainer(object):
self._agentNumExt = self.convertToExtPlayer(self._agentTeam, self._agentNumExt = self.convertToExtPlayer(self._agentTeam,
self._agentNumInt) self._agentNumInt)
agentCmd = 'start_agent.sh -t %s -u %i --numTeammates %i --numOpponents %i'\ agentCmd = 'start_agent.sh -t %s -u %i --numTeammates %i --numOpponents %i'\
%(self._agentTeam, self._agentNumExt, numTeammates, numOpponents) ' --playingOffense %i'\
%(self._agentTeam, self._agentNumExt, numTeammates, numOpponents,
self._agent_play_offense)
agentCmd = agentCmd.split(' ') agentCmd = agentCmd.split(' ')
# Ignore stderr because librcsc continually prints to it # Ignore stderr because librcsc continually prints to it
kwargs = {}#'stderr':open('/dev/null','w')} kwargs = {'stderr':open('/dev/null','w')}
p = subprocess.Popen(agentCmd, **kwargs) p = subprocess.Popen(agentCmd, **kwargs)
p.wait() p.wait()
with open('/tmp/start%i' % p.pid,'r') as f: with open('/tmp/start%i' % p.pid,'r') as f:
...@@ -470,7 +472,7 @@ class Trainer(object): ...@@ -470,7 +472,7 @@ class Trainer(object):
self.resetPlayerPositions() self.resetPlayerPositions()
self.send('(recover)') self.send('(recover)')
self.send('(change_mode play_on)') self.send('(change_mode play_on)')
self.send('(say RESET)') # self.send('(say RESET)')
def resetBallPosition(self): def resetBallPosition(self):
"""Reset the position of the ball for a new HFO trial. """ """Reset the position of the ball for a new HFO trial. """
...@@ -555,16 +557,20 @@ class Trainer(object): ...@@ -555,16 +557,20 @@ class Trainer(object):
if self.isGoal(): if self.isGoal():
self._numGoals += 1 self._numGoals += 1
result = 'Goal' result = 'Goal'
self.send('(say GOAL)')
elif self.isOOB(): elif self.isOOB():
self._numBallsOOB += 1 self._numBallsOOB += 1
result = 'Out of Bounds' result = 'Out of Bounds'
self.send('(say OUT_OF_BOUNDS)')
elif team_holding_ball not in [None,self._offenseTeamInd]: elif team_holding_ball not in [None,self._offenseTeamInd]:
self._numBallsCaptured += 1 self._numBallsCaptured += 1
result = 'Defense Captured' result = 'Defense Captured'
self.send('(say CAPTURED_BY_DEFENSE)')
elif self._frame - self._lastFrameBallTouched > self.UNTOUCHED_LENGTH: elif self._frame - self._lastFrameBallTouched > self.UNTOUCHED_LENGTH:
self._lastFrameBallTouched = self._frame self._lastFrameBallTouched = self._frame
self._numOutOfTime += 1 self._numOutOfTime += 1
result = 'Ball untouched for too long' result = 'Ball untouched for too long'
self.send('(say OUT_OF_TIME)')
else: else:
print '[Trainer] Error: Unable to detect reason for End of Trial!' print '[Trainer] Error: Unable to detect reason for End of Trial!'
sys.exit(1) sys.exit(1)
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# encoding: utf-8 # encoding: utf-8
import subprocess, os, time, numpy, sys import subprocess, os, time, numpy, sys
from signal import SIGINT from signal import SIGKILL
# Global list of all/essential running processes # Global list of all/essential running processes
processes, necProcesses = [], [] processes, necProcesses = [], []
...@@ -72,14 +72,14 @@ def main(args, team1='left', team2='right', rng=numpy.random.RandomState()): ...@@ -72,14 +72,14 @@ def main(args, team1='left', team2='right', rng=numpy.random.RandomState()):
# Run HFO # Run HFO
trainer.run(necProcesses) trainer.run(necProcesses)
except KeyboardInterrupt: except KeyboardInterrupt:
print 'Exiting for CTRL-C' print '[start.py] Exiting for CTRL-C'
finally: finally:
for p in processes: for p in processes:
try: try:
p.send_signal(SIGINT) p.send_signal(SIGKILL)
except: except:
pass pass
time.sleep(0.1) time.sleep(0.1)
def parseArgs(args=None): def parseArgs(args=None):
import argparse import argparse
......
...@@ -76,6 +76,7 @@ usage() ...@@ -76,6 +76,7 @@ usage()
echo " --teammate STR name of teammates" echo " --teammate STR name of teammates"
echo " --numTeammates NUM number of teammates" echo " --numTeammates NUM number of teammates"
echo " --numOpponents NUM number of opponents" echo " --numOpponents NUM number of opponents"
echo " --playingOffense [0|1] are we playing offense or defense"
echo " --seed NUM seed for rng" echo " --seed NUM seed for rng"
echo " --gdb runs with gdb on (default:off)" echo " --gdb runs with gdb on (default:off)"
) 1>&2 ) 1>&2
...@@ -236,7 +237,7 @@ do ...@@ -236,7 +237,7 @@ do
opts="${opts} --teammate ${2}" opts="${opts} --teammate ${2}"
shift 1 shift 1
;; ;;
--numTeammates) --numTeammates)
if [ $# -lt 2 ]; then if [ $# -lt 2 ]; then
usage usage
...@@ -245,7 +246,7 @@ do ...@@ -245,7 +246,7 @@ do
opts="${opts} --numTeammates ${2}" opts="${opts} --numTeammates ${2}"
shift 1 shift 1
;; ;;
--numOpponents) --numOpponents)
if [ $# -lt 2 ]; then if [ $# -lt 2 ]; then
usage usage
...@@ -254,7 +255,16 @@ do ...@@ -254,7 +255,16 @@ do
opts="${opts} --numOpponents ${2}" opts="${opts} --numOpponents ${2}"
shift 1 shift 1
;; ;;
--playingOffense)
if [ $# -lt 2 ]; then
usage
exit 1
fi
opts="${opts} --playingOffense ${2}"
shift 1
;;
--seed) --seed)
if [ $# -lt 2 ]; then if [ $# -lt 2 ]; then
usage usage
......
...@@ -4,21 +4,42 @@ ...@@ -4,21 +4,42 @@
using namespace std; using namespace std;
// First Start the server by calling start.py in bin // First Start the server: $> bin/start.py
int main() { int main() {
// Create the HFO environment // Create the HFO environment
HFOEnvironment hfo; HFOEnvironment hfo;
// Connect the agent's server which should be listening if // Connect the agent's server
// ./bin/start.py was called.
hfo.connectToAgentServer(); hfo.connectToAgentServer();
// Continue until finished // Play 5 episodes
while (true) { for (int episode=0; episode<5; episode++) {
// Grab the vector of state features for the current state hfo_status_t status = IN_GAME;
const std::vector<float>& feature_vec = hfo.getState(); while (status == IN_GAME) {
// Create a dash action // Grab the vector of state features for the current state
Action a = {DASH, 100., 0.}; const std::vector<float>& feature_vec = hfo.getState();
// Perform the dash and recieve the reward // Create a dash action
float reward = hfo.act(a); Action a = {DASH, 100., 0.};
// Perform the dash and recieve the reward
status = hfo.act(a);
}
// Check what the outcome of the episode was
cout << "Episode " << episode << " ended with status: ";
switch (status) {
case GOAL:
cout << "goal" << endl;
break;
case CAPTURED_BY_DEFENSE:
cout << "captured by defense" << endl;
break;
case OUT_OF_BOUNDS:
cout << "out of bounds" << endl;
break;
case OUT_OF_TIME:
cout << "out of time" << endl;
break;
default:
cout << "Unknown status " << status << endl;
exit(1);
}
} }
}; };
...@@ -3,21 +3,42 @@ ...@@ -3,21 +3,42 @@
import imp import imp
# First Start the server by calling start.py in bin # First Start the server: $> bin/start.py
if __name__ == '__main__': if __name__ == '__main__':
# Load the HFO library # Load the HFO library
hfo_module = imp.load_source('HFO', '../HFO.py') try:
hfo_module = imp.load_source('HFO', '../HFO.py')
except:
hfo_module = imp.load_source('HFO', 'HFO.py')
# Get the possible actions # Get the possible actions
actions = hfo_module.Actions HFO_Actions = hfo_module.HFO_Actions
# Get the possible outcomes
HFO_Status = hfo_module.HFO_Status
# Create the HFO Environment # Create the HFO Environment
hfo = hfo_module.HFOEnvironment() hfo = hfo_module.HFOEnvironment()
# Connect to the agent server
hfo.connectToAgentServer() hfo.connectToAgentServer()
# Continue until finished # Play 5 episodes
while True: for episode in xrange(5):
# Grab the state features from the environment status = HFO_Status.IN_GAME
features = hfo.getState() while status == HFO_Status.IN_GAME:
# Take an action and get the reward # Grab the state features from the environment
reward = hfo.act((actions.KICK, 100, 12.3)) features = hfo.getState()
# Take an action and get the reward
status = hfo.act((HFO_Actions.KICK, 100, 12.3))
print 'Episode', episode, 'ended with',
# Check what the outcome of the episode was
if status == HFO_Status.GOAL:
print 'goal'
elif status == HFO_Status.CAPTURED_BY_DEFENSE:
print 'captured by defense'
elif status == HFO_Status.OUT_OF_BOUNDS:
print 'out of bounds'
elif status == HFO_Status.OUT_OF_TIME:
print 'out of time'
else:
print 'Unknown status', status
exit()
# Cleanup when finished # Cleanup when finished
hfo.cleanup() hfo.cleanup()
...@@ -23,9 +23,8 @@ class HFOEnvironment { ...@@ -23,9 +23,8 @@ class HFOEnvironment {
// TACKLE, // Tackle(direction) // TACKLE, // Tackle(direction)
// KICK // Kick(power, direction) // KICK // Kick(power, direction)
// }; // };
// Take an action and recieve the resulting game status
// Take an action and recieve the resulting reward. hfo_status_t act(Action action);
float act(Action action);
protected: protected:
int numFeatures; // The number of features in this domain int numFeatures; // The number of features in this domain
......
...@@ -16,6 +16,11 @@ void error(const char *msg) { ...@@ -16,6 +16,11 @@ void error(const char *msg) {
HFOEnvironment::HFOEnvironment() {} HFOEnvironment::HFOEnvironment() {}
HFOEnvironment::~HFOEnvironment() { HFOEnvironment::~HFOEnvironment() {
// Send a quit action and close the connection to the agent's server
action_t quit = QUIT;
if (send(sockfd, &quit, sizeof(int), 0) < 0) {
error("[Agent Client] ERROR sending from socket");
}
close(sockfd); close(sockfd);
} }
...@@ -69,6 +74,15 @@ void HFOEnvironment::handshakeAgentServer() { ...@@ -69,6 +74,15 @@ void HFOEnvironment::handshakeAgentServer() {
if (send(sockfd, &numFeatures, sizeof(int), 0) < 0) { if (send(sockfd, &numFeatures, sizeof(int), 0) < 0) {
error("[Agent Client] ERROR sending from socket"); error("[Agent Client] ERROR sending from socket");
} }
// Recieve the game status
hfo_status_t status;
if (recv(sockfd, &status, sizeof(hfo_status_t), 0) < 0) {
error("[Agent Client] ERROR recv from socket");
}
if (status != IN_GAME) {
std::cout << "[Agent Client] Handshake failed: status check." << std::endl;
exit(1);
}
std::cout << "[Agent Client] Handshake complete" << std::endl; std::cout << "[Agent Client] Handshake complete" << std::endl;
} }
...@@ -82,9 +96,15 @@ const std::vector<float>& HFOEnvironment::getState() { ...@@ -82,9 +96,15 @@ const std::vector<float>& HFOEnvironment::getState() {
return feature_vec; return feature_vec;
} }
float HFOEnvironment::act(Action action) { hfo_status_t HFOEnvironment::act(Action action) {
hfo_status_t game_status;
// Send the action
if (send(sockfd, &action, sizeof(Action), 0) < 0) { if (send(sockfd, &action, sizeof(Action), 0) < 0) {
error("[Agent Client] ERROR sending from socket"); error("[Agent Client] ERROR sending from socket");
} }
return 0.; // Get the game status
if (recv(sockfd, &game_status, sizeof(hfo_status_t), 0) < 0) {
error("[Agent Client] ERROR recieving from socket");
}
return game_status;
} }
...@@ -110,6 +110,8 @@ Agent::Agent() ...@@ -110,6 +110,8 @@ Agent::Agent()
M_field_evaluator(createFieldEvaluator()), M_field_evaluator(createFieldEvaluator()),
M_action_generator(createActionGenerator()), M_action_generator(createActionGenerator()),
numTeammates(-1), numOpponents(-1), numFeatures(-1), numTeammates(-1), numOpponents(-1), numFeatures(-1),
lastTrainerMessageTime(-1),
episode_start(true),
server_running(false) server_running(false)
{ {
boost::shared_ptr< AudioMemory > audio_memory( new AudioMemory ); boost::shared_ptr< AudioMemory > audio_memory( new AudioMemory );
...@@ -173,6 +175,8 @@ bool Agent::initImpl(CmdLineParser & cmd_parser) { ...@@ -173,6 +175,8 @@ bool Agent::initImpl(CmdLineParser & cmd_parser) {
rcsc::ParamMap my_params("Additional options"); rcsc::ParamMap my_params("Additional options");
my_params.add()("numTeammates", "", &numTeammates, "number of teammates"); my_params.add()("numTeammates", "", &numTeammates, "number of teammates");
my_params.add()("numOpponents", "", &numOpponents, "number of opponents"); my_params.add()("numOpponents", "", &numOpponents, "number of opponents");
my_params.add()("playingOffense", "", &playingOffense,
"are we playing offense or defense");
cmd_parser.parse(my_params); cmd_parser.parse(my_params);
if (cmd_parser.count("help") > 0) { if (cmd_parser.count("help") > 0) {
...@@ -426,6 +430,27 @@ void Agent::clientHandshake() { ...@@ -426,6 +430,27 @@ void Agent::clientHandshake() {
std::cout << "[Agent Server] Handshake complete" << std::endl; std::cout << "[Agent Server] Handshake complete" << std::endl;
} }
hfo_status_t Agent::getGameStatus() {
hfo_status_t game_status = IN_GAME;
if (audioSensor().trainerMessageTime().cycle() > lastTrainerMessageTime) {
lastTrainerMessageTime = audioSensor().trainerMessageTime().cycle();
const std::string& message = audioSensor().trainerMessage();
if (message.compare("GOAL") == 0) {
game_status = GOAL;
} else if (message.compare("CAPTURED_BY_DEFENSE") == 0) {
game_status = CAPTURED_BY_DEFENSE;
} else if (message.compare("OUT_OF_BOUNDS") == 0) {
game_status = OUT_OF_BOUNDS;
} else if (message.compare("OUT_OF_TIME") == 0) {
game_status = OUT_OF_TIME;
} else {
std::cout << "[Agent Server] Unrecognized Trainer Message: " << message
<< std::endl;
}
}
return game_status;
}
/*! /*!
main decision main decision
virtual method in super class virtual method in super class
...@@ -436,10 +461,14 @@ void Agent::actionImpl() { ...@@ -436,10 +461,14 @@ void Agent::actionImpl() {
clientHandshake(); clientHandshake();
} }
// Update the state features // Update and send the game status
updateStateFeatures(); hfo_status_t game_status = getGameStatus();
if (send(newsockfd, &game_status, sizeof(int), 0) < 0) {
error("[Agent Server] ERROR sending from socket");
}
// Send the state features // Update and send the state features
updateStateFeatures();
if (send(newsockfd, &(feature_vec.front()), if (send(newsockfd, &(feature_vec.front()),
numFeatures * sizeof(float), 0) < 0) { numFeatures * sizeof(float), 0) < 0) {
error("[Agent Server] ERROR sending state features from socket"); error("[Agent Server] ERROR sending state features from socket");
...@@ -463,14 +492,15 @@ void Agent::actionImpl() { ...@@ -463,14 +492,15 @@ void Agent::actionImpl() {
case KICK: case KICK:
this->doKick(action.arg1, action.arg2); this->doKick(action.arg1, action.arg2);
break; break;
case QUIT:
std::cout << "[Agent Server] Got quit from agent." << std::endl;
exit(0);
default: default:
std::cerr << "[Agent Server] ERROR Unsupported Action: " std::cerr << "[Agent Server] ERROR Unsupported Action: "
<< action.action << std::endl; << action.action << std::endl;
exit(1); exit(1);
} }
// TODO: How to get rewards?
// For now let's not worry about turning the neck or setting the vision. // For now let's not worry about turning the neck or setting the vision.
this->setViewAction(new View_Tactical()); this->setViewAction(new View_Tactical());
this->setNeckAction(new Neck_TurnToBallOrScan()); this->setNeckAction(new Neck_TurnToBallOrScan());
......
...@@ -40,7 +40,18 @@ enum action_t ...@@ -40,7 +40,18 @@ enum action_t
DASH, // Dash(power, relative_direction) DASH, // Dash(power, relative_direction)
TURN, // Turn(direction) TURN, // Turn(direction)
TACKLE, // Tackle(direction) TACKLE, // Tackle(direction)
KICK // Kick(power, direction) KICK, // Kick(power, direction)
QUIT // Special action to quit the game
};
// The current status of the HFO game
enum hfo_status_t
{
IN_GAME,
GOAL,
CAPTURED_BY_DEFENSE,
OUT_OF_BOUNDS,
OUT_OF_TIME
}; };
struct Action { struct Action {
...@@ -76,12 +87,16 @@ protected: ...@@ -76,12 +87,16 @@ protected:
// Updated the state features stored in feature_vec // Updated the state features stored in feature_vec
void updateStateFeatures(); void updateStateFeatures();
// Get the current game status
hfo_status_t getGameStatus();
// Add the angle and distance to the landmark to the feature_vec // Add the angle and distance to the landmark to the feature_vec
void addLandmarkFeature(const rcsc::Vector2D& landmark, void addLandmarkFeature(const rcsc::Vector2D& landmark,
const rcsc::Vector2D& self_pos); const rcsc::Vector2D& self_pos);
int numTeammates; int numTeammates;
int numOpponents; int numOpponents;
bool playingOffense; // Are we playing offense or defense?
int numFeatures; // Total number of features int numFeatures; // Total number of features
// Number of features for non-player objects. Clearly this is the answer. // Number of features for non-player objects. Clearly this is the answer.
const static int num_basic_features = 42; const static int num_basic_features = 42;
...@@ -90,6 +105,8 @@ protected: ...@@ -90,6 +105,8 @@ protected:
std::vector<float> feature_vec; // Contains the current features std::vector<float> feature_vec; // Contains the current features
int featIndx; // Feature being populated int featIndx; // Feature being populated
const static int server_port = 6008; const static int server_port = 6008;
long lastTrainerMessageTime; // Last time the trainer sent a message
bool episode_start; // True only in the timestep that the game is starting
// Start the server and listen for a connection. // Start the server and listen for a connection.
virtual void startServer(); virtual void startServer();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment