Commit 7a4eddf1 authored by Matthew Hausknecht's avatar Matthew Hausknecht

Polised the agent server interface.

parent 44277b38
import socket, struct, thread, time
class Actions:
class HFO_Actions:
''' An enum of the possible HFO actions
Dash(power, relative_direction)
......@@ -9,7 +9,11 @@ class Actions:
Kick(power, direction)
'''
DASH, TURN, TACKLE, KICK = range(4)
DASH, TURN, TACKLE, KICK, QUIT = range(5)
class HFO_Status:
''' Current status of the HFO game. '''
IN_GAME, GOAL, CAPTURED_BY_DEFENSE, OUT_OF_BOUNDS, OUT_OF_TIME = range(5)
class HFOEnvironment(object):
......@@ -17,11 +21,9 @@ class HFOEnvironment(object):
between a learning agent and the Half-Field-Offense domain.
'''
def __init__(self):
self.socket = None # Socket connection to server
self.numFeatures = None # Given by the server in handshake
self.actions = ['DASH', 'TURN', 'TACKLE', 'KICK']
def connectToAgentServer(self, server_port=6008):
'''Connect to the server that controls the agent on the specified port. '''
......@@ -51,6 +53,10 @@ class HFOEnvironment(object):
self.numFeatures = struct.unpack("i", data)[0]
# Send what we recieved
self.socket.send(struct.pack("i", self.numFeatures))
# Get the current game status
data = self.socket.recv(struct.calcsize("i"))
status = struct.unpack("i", data)[0]
assert status == HFO_Status.IN_GAME, "Status check failed"
print '[Agent Client] Handshake complete'
def getState(self):
......@@ -65,11 +71,14 @@ class HFOEnvironment(object):
return features
def act(self, action):
''' Send an action and recieve the resulting reward from the environment.'''
''' Send an action and recieve the game status.'''
self.socket.send(struct.pack("iff", *action))
# TODO: Get the rewards from the domain
return 0
# Get the current game status
data = self.socket.recv(struct.calcsize("i"))
status = struct.unpack("i", data)[0]
return status
def cleanup(self):
''' Close the connection to the agent's server. '''
''' Send a quit and close the connection to the agent's server. '''
self.socket.send(struct.pack("i", HFO_Actions.QUIT))
self.socket.close()
......@@ -26,3 +26,7 @@ By default if your agent takes longer then two seconds to select an action it wi
```bash
./bin/start.py
```
and in a seperate terminal
```bash
./examples/hfo_example_agent
```
......@@ -97,10 +97,12 @@ class Trainer(object):
self._agentNumExt = self.convertToExtPlayer(self._agentTeam,
self._agentNumInt)
agentCmd = 'start_agent.sh -t %s -u %i --numTeammates %i --numOpponents %i'\
%(self._agentTeam, self._agentNumExt, numTeammates, numOpponents)
' --playingOffense %i'\
%(self._agentTeam, self._agentNumExt, numTeammates, numOpponents,
self._agent_play_offense)
agentCmd = agentCmd.split(' ')
# Ignore stderr because librcsc continually prints to it
kwargs = {}#'stderr':open('/dev/null','w')}
kwargs = {'stderr':open('/dev/null','w')}
p = subprocess.Popen(agentCmd, **kwargs)
p.wait()
with open('/tmp/start%i' % p.pid,'r') as f:
......@@ -470,7 +472,7 @@ class Trainer(object):
self.resetPlayerPositions()
self.send('(recover)')
self.send('(change_mode play_on)')
self.send('(say RESET)')
# self.send('(say RESET)')
def resetBallPosition(self):
"""Reset the position of the ball for a new HFO trial. """
......@@ -555,16 +557,20 @@ class Trainer(object):
if self.isGoal():
self._numGoals += 1
result = 'Goal'
self.send('(say GOAL)')
elif self.isOOB():
self._numBallsOOB += 1
result = 'Out of Bounds'
self.send('(say OUT_OF_BOUNDS)')
elif team_holding_ball not in [None,self._offenseTeamInd]:
self._numBallsCaptured += 1
result = 'Defense Captured'
self.send('(say CAPTURED_BY_DEFENSE)')
elif self._frame - self._lastFrameBallTouched > self.UNTOUCHED_LENGTH:
self._lastFrameBallTouched = self._frame
self._numOutOfTime += 1
result = 'Ball untouched for too long'
self.send('(say OUT_OF_TIME)')
else:
print '[Trainer] Error: Unable to detect reason for End of Trial!'
sys.exit(1)
......
......@@ -2,7 +2,7 @@
# encoding: utf-8
import subprocess, os, time, numpy, sys
from signal import SIGINT
from signal import SIGKILL
# Global list of all/essential running processes
processes, necProcesses = [], []
......@@ -72,14 +72,14 @@ def main(args, team1='left', team2='right', rng=numpy.random.RandomState()):
# Run HFO
trainer.run(necProcesses)
except KeyboardInterrupt:
print 'Exiting for CTRL-C'
print '[start.py] Exiting for CTRL-C'
finally:
for p in processes:
try:
p.send_signal(SIGINT)
p.send_signal(SIGKILL)
except:
pass
time.sleep(0.1)
time.sleep(0.1)
def parseArgs(args=None):
import argparse
......
......@@ -76,6 +76,7 @@ usage()
echo " --teammate STR name of teammates"
echo " --numTeammates NUM number of teammates"
echo " --numOpponents NUM number of opponents"
echo " --playingOffense [0|1] are we playing offense or defense"
echo " --seed NUM seed for rng"
echo " --gdb runs with gdb on (default:off)"
) 1>&2
......@@ -236,7 +237,7 @@ do
opts="${opts} --teammate ${2}"
shift 1
;;
--numTeammates)
if [ $# -lt 2 ]; then
usage
......@@ -245,7 +246,7 @@ do
opts="${opts} --numTeammates ${2}"
shift 1
;;
--numOpponents)
if [ $# -lt 2 ]; then
usage
......@@ -254,7 +255,16 @@ do
opts="${opts} --numOpponents ${2}"
shift 1
;;
--playingOffense)
if [ $# -lt 2 ]; then
usage
exit 1
fi
opts="${opts} --playingOffense ${2}"
shift 1
;;
--seed)
if [ $# -lt 2 ]; then
usage
......
......@@ -4,21 +4,42 @@
using namespace std;
// First Start the server by calling start.py in bin
// First Start the server: $> bin/start.py
int main() {
// Create the HFO environment
HFOEnvironment hfo;
// Connect the agent's server which should be listening if
// ./bin/start.py was called.
// Connect the agent's server
hfo.connectToAgentServer();
// Continue until finished
while (true) {
// Grab the vector of state features for the current state
const std::vector<float>& feature_vec = hfo.getState();
// Create a dash action
Action a = {DASH, 100., 0.};
// Perform the dash and recieve the reward
float reward = hfo.act(a);
// Play 5 episodes
for (int episode=0; episode<5; episode++) {
hfo_status_t status = IN_GAME;
while (status == IN_GAME) {
// Grab the vector of state features for the current state
const std::vector<float>& feature_vec = hfo.getState();
// Create a dash action
Action a = {DASH, 100., 0.};
// Perform the dash and recieve the reward
status = hfo.act(a);
}
// Check what the outcome of the episode was
cout << "Episode " << episode << " ended with status: ";
switch (status) {
case GOAL:
cout << "goal" << endl;
break;
case CAPTURED_BY_DEFENSE:
cout << "captured by defense" << endl;
break;
case OUT_OF_BOUNDS:
cout << "out of bounds" << endl;
break;
case OUT_OF_TIME:
cout << "out of time" << endl;
break;
default:
cout << "Unknown status " << status << endl;
exit(1);
}
}
};
......@@ -3,21 +3,42 @@
import imp
# First Start the server by calling start.py in bin
# First Start the server: $> bin/start.py
if __name__ == '__main__':
# Load the HFO library
hfo_module = imp.load_source('HFO', '../HFO.py')
try:
hfo_module = imp.load_source('HFO', '../HFO.py')
except:
hfo_module = imp.load_source('HFO', 'HFO.py')
# Get the possible actions
actions = hfo_module.Actions
HFO_Actions = hfo_module.HFO_Actions
# Get the possible outcomes
HFO_Status = hfo_module.HFO_Status
# Create the HFO Environment
hfo = hfo_module.HFOEnvironment()
# Connect to the agent server
hfo.connectToAgentServer()
# Continue until finished
while True:
# Grab the state features from the environment
features = hfo.getState()
# Take an action and get the reward
reward = hfo.act((actions.KICK, 100, 12.3))
# Play 5 episodes
for episode in xrange(5):
status = HFO_Status.IN_GAME
while status == HFO_Status.IN_GAME:
# Grab the state features from the environment
features = hfo.getState()
# Take an action and get the reward
status = hfo.act((HFO_Actions.KICK, 100, 12.3))
print 'Episode', episode, 'ended with',
# Check what the outcome of the episode was
if status == HFO_Status.GOAL:
print 'goal'
elif status == HFO_Status.CAPTURED_BY_DEFENSE:
print 'captured by defense'
elif status == HFO_Status.OUT_OF_BOUNDS:
print 'out of bounds'
elif status == HFO_Status.OUT_OF_TIME:
print 'out of time'
else:
print 'Unknown status', status
exit()
# Cleanup when finished
hfo.cleanup()
......@@ -23,9 +23,8 @@ class HFOEnvironment {
// TACKLE, // Tackle(direction)
// KICK // Kick(power, direction)
// };
// Take an action and recieve the resulting reward.
float act(Action action);
// Take an action and recieve the resulting game status
hfo_status_t act(Action action);
protected:
int numFeatures; // The number of features in this domain
......
......@@ -16,6 +16,11 @@ void error(const char *msg) {
HFOEnvironment::HFOEnvironment() {}
HFOEnvironment::~HFOEnvironment() {
// Send a quit action and close the connection to the agent's server
action_t quit = QUIT;
if (send(sockfd, &quit, sizeof(int), 0) < 0) {
error("[Agent Client] ERROR sending from socket");
}
close(sockfd);
}
......@@ -69,6 +74,15 @@ void HFOEnvironment::handshakeAgentServer() {
if (send(sockfd, &numFeatures, sizeof(int), 0) < 0) {
error("[Agent Client] ERROR sending from socket");
}
// Recieve the game status
hfo_status_t status;
if (recv(sockfd, &status, sizeof(hfo_status_t), 0) < 0) {
error("[Agent Client] ERROR recv from socket");
}
if (status != IN_GAME) {
std::cout << "[Agent Client] Handshake failed: status check." << std::endl;
exit(1);
}
std::cout << "[Agent Client] Handshake complete" << std::endl;
}
......@@ -82,9 +96,15 @@ const std::vector<float>& HFOEnvironment::getState() {
return feature_vec;
}
float HFOEnvironment::act(Action action) {
hfo_status_t HFOEnvironment::act(Action action) {
hfo_status_t game_status;
// Send the action
if (send(sockfd, &action, sizeof(Action), 0) < 0) {
error("[Agent Client] ERROR sending from socket");
}
return 0.;
// Get the game status
if (recv(sockfd, &game_status, sizeof(hfo_status_t), 0) < 0) {
error("[Agent Client] ERROR recieving from socket");
}
return game_status;
}
......@@ -110,6 +110,8 @@ Agent::Agent()
M_field_evaluator(createFieldEvaluator()),
M_action_generator(createActionGenerator()),
numTeammates(-1), numOpponents(-1), numFeatures(-1),
lastTrainerMessageTime(-1),
episode_start(true),
server_running(false)
{
boost::shared_ptr< AudioMemory > audio_memory( new AudioMemory );
......@@ -173,6 +175,8 @@ bool Agent::initImpl(CmdLineParser & cmd_parser) {
rcsc::ParamMap my_params("Additional options");
my_params.add()("numTeammates", "", &numTeammates, "number of teammates");
my_params.add()("numOpponents", "", &numOpponents, "number of opponents");
my_params.add()("playingOffense", "", &playingOffense,
"are we playing offense or defense");
cmd_parser.parse(my_params);
if (cmd_parser.count("help") > 0) {
......@@ -426,6 +430,27 @@ void Agent::clientHandshake() {
std::cout << "[Agent Server] Handshake complete" << std::endl;
}
hfo_status_t Agent::getGameStatus() {
hfo_status_t game_status = IN_GAME;
if (audioSensor().trainerMessageTime().cycle() > lastTrainerMessageTime) {
lastTrainerMessageTime = audioSensor().trainerMessageTime().cycle();
const std::string& message = audioSensor().trainerMessage();
if (message.compare("GOAL") == 0) {
game_status = GOAL;
} else if (message.compare("CAPTURED_BY_DEFENSE") == 0) {
game_status = CAPTURED_BY_DEFENSE;
} else if (message.compare("OUT_OF_BOUNDS") == 0) {
game_status = OUT_OF_BOUNDS;
} else if (message.compare("OUT_OF_TIME") == 0) {
game_status = OUT_OF_TIME;
} else {
std::cout << "[Agent Server] Unrecognized Trainer Message: " << message
<< std::endl;
}
}
return game_status;
}
/*!
main decision
virtual method in super class
......@@ -436,10 +461,14 @@ void Agent::actionImpl() {
clientHandshake();
}
// Update the state features
updateStateFeatures();
// Update and send the game status
hfo_status_t game_status = getGameStatus();
if (send(newsockfd, &game_status, sizeof(int), 0) < 0) {
error("[Agent Server] ERROR sending from socket");
}
// Send the state features
// Update and send the state features
updateStateFeatures();
if (send(newsockfd, &(feature_vec.front()),
numFeatures * sizeof(float), 0) < 0) {
error("[Agent Server] ERROR sending state features from socket");
......@@ -463,14 +492,15 @@ void Agent::actionImpl() {
case KICK:
this->doKick(action.arg1, action.arg2);
break;
case QUIT:
std::cout << "[Agent Server] Got quit from agent." << std::endl;
exit(0);
default:
std::cerr << "[Agent Server] ERROR Unsupported Action: "
<< action.action << std::endl;
exit(1);
}
// TODO: How to get rewards?
// For now let's not worry about turning the neck or setting the vision.
this->setViewAction(new View_Tactical());
this->setNeckAction(new Neck_TurnToBallOrScan());
......
......@@ -40,7 +40,18 @@ enum action_t
DASH, // Dash(power, relative_direction)
TURN, // Turn(direction)
TACKLE, // Tackle(direction)
KICK // Kick(power, direction)
KICK, // Kick(power, direction)
QUIT // Special action to quit the game
};
// The current status of the HFO game
enum hfo_status_t
{
IN_GAME,
GOAL,
CAPTURED_BY_DEFENSE,
OUT_OF_BOUNDS,
OUT_OF_TIME
};
struct Action {
......@@ -76,12 +87,16 @@ protected:
// Updated the state features stored in feature_vec
void updateStateFeatures();
// Get the current game status
hfo_status_t getGameStatus();
// Add the angle and distance to the landmark to the feature_vec
void addLandmarkFeature(const rcsc::Vector2D& landmark,
const rcsc::Vector2D& self_pos);
int numTeammates;
int numOpponents;
bool playingOffense; // Are we playing offense or defense?
int numFeatures; // Total number of features
// Number of features for non-player objects. Clearly this is the answer.
const static int num_basic_features = 42;
......@@ -90,6 +105,8 @@ protected:
std::vector<float> feature_vec; // Contains the current features
int featIndx; // Feature being populated
const static int server_port = 6008;
long lastTrainerMessageTime; // Last time the trainer sent a message
bool episode_start; // True only in the timestep that the game is starting
// Start the server and listen for a connection.
virtual void startServer();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment