Commit 09cc8de1 authored by Matthew Hausknecht's avatar Matthew Hausknecht

Lots of cleanups to the trainer

parent c50fd3e4
...@@ -40,7 +40,7 @@ class Communicator(object): ...@@ -40,7 +40,7 @@ class Communicator(object):
pass pass
finally: finally:
self._sock = None self._sock = None
def sendMsg(self,msg): def sendMsg(self,msg):
#print 'sending',msg #print 'sending',msg
self._sock.sendto(msg + '\0',self._addr) self._sock.sendto(msg + '\0',self._addr)
...@@ -69,7 +69,7 @@ class Communicator(object): ...@@ -69,7 +69,7 @@ class Communicator(object):
self._storedMsg = rest self._storedMsg = rest
#print 'received',msg #print 'received',msg
return msg return msg
def send(self,obj): def send(self,obj):
self.sendMsg(pickle.dumps(obj)) self.sendMsg(pickle.dumps(obj))
...@@ -90,5 +90,3 @@ class ClientCommunicator(Communicator): ...@@ -90,5 +90,3 @@ class ClientCommunicator(Communicator):
except: except:
print >>sys.stderr,'Error creating socket' print >>sys.stderr,'Error creating socket'
raise raise
# vim: ts=2:sw=2:expandtab:
...@@ -2,15 +2,9 @@ ...@@ -2,15 +2,9 @@
# encoding: utf-8 # encoding: utf-8
import sys, numpy, time, os, subprocess, re import sys, numpy, time, os, subprocess, re
from optparse import Values
from signal import SIGINT from signal import SIGINT
from Communicator import ClientCommunicator, TimeoutError from Communicator import ClientCommunicator, TimeoutError
ADHOC_DIR = os.path.dirname(os.path.realpath(__file__))
#os.path.expanduser('~/research/adhoc2/robocup/adhoc-agent/')
#ADHOC_CMD = 'bin/start.sh -t %s -u %i --offenseAgents %s --defenseAgents %s'
ADHOC_CMD = 'start_agent.sh -t %s -u %i'
class DoneError(Exception): class DoneError(Exception):
def __init__(self,msg='unknown'): def __init__(self,msg='unknown'):
self.msg = msg self.msg = msg
...@@ -35,20 +29,19 @@ class DummyPopen(object): ...@@ -35,20 +29,19 @@ class DummyPopen(object):
pass pass
class Trainer(object): class Trainer(object):
# numDefense is excluding the goalie """ Trainer is responsible for setting up the players and game.
def __init__(self,seed=None, options=Values()): """
self._options = options def __init__(self, args, rng=numpy.random.RandomState()):
self._numOffense = self._options.numOffense self._args = args
self._numDefense = self._options.numDefense self._numOffense = self._args.numOffense
self._numDefense = self._args.numDefense
self._teams = [] self._teams = []
self._lastTrialStart = -1 self._lastTrialStart = -1
self._numFrames = 0 self._numFrames = 0
self._lastFrameBallTouched = -1 self._lastFrameBallTouched = -1
self._maxTrials = self._options.numTrials self._maxTrials = self._args.numTrials
self._maxFrames = self._options.numFrames self._maxFrames = self._args.numFrames
self._rng = rng
self._rng = numpy.random.RandomState(seed)
self._playerPositions = numpy.zeros((11,2,2)) self._playerPositions = numpy.zeros((11,2,2))
self._ballPosition = numpy.zeros(2) self._ballPosition = numpy.zeros(2)
self._ballHeld = numpy.zeros((11,2)) self._ballHeld = numpy.zeros((11,2))
...@@ -58,76 +51,92 @@ class Trainer(object): ...@@ -58,76 +51,92 @@ class Trainer(object):
self.HOLD_FACTOR = 1.5 self.HOLD_FACTOR = 1.5
self.PITCH_WIDTH = 68.0 self.PITCH_WIDTH = 68.0
self.PITCH_LENGTH = 105.0 self.PITCH_LENGTH = 105.0
# Trial will end if the ball is untouched for this many steps
self.UNTOUCHED_LENGTH = 100 self.UNTOUCHED_LENGTH = 100
# allowedBallX, allowedBallY defines the usable area of the playfield
self._allowedBallX = numpy.array([-0.1,0.5 * self.PITCH_LENGTH]) self._allowedBallX = numpy.array([-0.1,0.5 * self.PITCH_LENGTH])
self._allowedBallY = numpy.array([-0.5 * self.PITCH_WIDTH,0.5 * self.PITCH_WIDTH]) self._allowedBallY = numpy.array([-0.5 * self.PITCH_WIDTH,
0.5 * self.PITCH_WIDTH])
self._numTrials = 0 self._numTrials = 0
self._numGoals = 0 self._numGoals = 0
self._numBallsCaptured = 0 self._numBallsCaptured = 0
self._numBallsOOB = 0 self._numBallsOOB = 0
# Indicates if a learning agent is active
self._adhocTeam = '' self._agent = not self._args.no_agent
self._adhocNumInt = -1 self._agentTeam = ''
self._adhocNumExt = -1 self._agentNumInt = -1
self._agentNumExt = -1
self._isPlaying = False self._isPlaying = False
self._agentPopen = None
self._adhocPopen = None
self.initMsgHandlers() self.initMsgHandlers()
def launchAdhoc(self): def launch_agent(self):
# start ad hoc agent print '[Trainer] Launching Agent'
os.chdir(ADHOC_DIR) AGENT_DIR = os.path.dirname(os.path.realpath(__file__))
self._adhocTeam = self._offenseTeam if self._options.adhocOffense else self._defenseTeam AGENT_CMD = 'start_agent.sh -t %s -u %i'
if self._options.adhocOffense: os.chdir(AGENT_DIR)
if self._args.play_offense:
assert self._numOffense > 0 assert self._numOffense > 0
self._adhocNumInt = 1 if self._numOffense == 1 \ self._agentTeam = self._offenseTeam
self._agentNumInt = 1 if self._numOffense == 1 \
else self._rng.randint(1, self._numOffense) else self._rng.randint(1, self._numOffense)
else: else:
assert self._numDefense > 0 assert self._numDefense > 0
self._adhocNumInt = 1 if self._numDefense == 1 \ self._agentTeam = self._defenseTeam
else self._rng.randint(1, self._numDefense) self._agentNumInt = 0 if self._numDefense == 1 \
self._adhocNumExt = self.convertToExtPlayer(self._adhocTeam,self._adhocNumInt) else self._rng.randint(0, self._numDefense)
adhocCmd = ADHOC_CMD % (self._adhocTeam, self._adhocNumExt) self._agentNumExt = self.convertToExtPlayer(self._agentTeam,
adhocCmd = adhocCmd.split(' ') self._agentNumInt)
if self._options.learnActions: agentCmd = AGENT_CMD % (self._agentTeam, self._agentNumExt)
adhocCmd += ['--learn-actions',str(self._options.numLearnActions)] agentCmd = agentCmd.split(' ')
print 'AdhocCmd', adhocCmd p = subprocess.Popen(agentCmd)
p = subprocess.Popen(adhocCmd)
p.wait() p.wait()
with open('/tmp/start%i' % p.pid,'r') as f: with open('/tmp/start%i' % p.pid,'r') as f:
output = f.read() output = f.read()
pid = int(re.findall('PID: (\d+)',output)[0]) pid = int(re.findall('PID: (\d+)',output)[0])
return DummyPopen(pid) return DummyPopen(pid)
def getPlayers(self,name): def getDefensiveRoster(self, team_name):
if name == 'Borregos': """Returns a list of player numbers on a given team that are thought
offense = [2,4,6,5,3,7,9,10,8,11] to prefer defense. This map is not set in stone as the players on
defense = [9,10,8,11,7,4,6,2,3,5] some teams can adapt and change their roles.
elif name == 'WrightEagle':
offense = [11,4,7,3,6,10,8,9,2,5]
defense = [5,2,8,9,10,6,3,11,4,7]
else:
offense = [11,7,8,9,10,6,3,2,4,5]
defense = [2,3,4,5,6,7,8,11,9,10]
return offense, defense
"""
if team_name == 'Borregos':
return [9,10,8,11,7,4,6,2,3,5]
elif team_name == 'WrightEagle':
return [5,2,8,9,10,6,3,11,4,7]
else:
return [2,3,4,5,6,7,8,11,9,10]
def getOffensiveRoster(self, team_name):
"""Returns a list of player numbers on a given team that are thought
to prefer offense. This map is not set in stone as the players on
some teams can adapt and change their roles.
"""
if team_name == 'Borregos':
return [2,4,6,5,3,7,9,10,8,11]
elif team_name == 'WrightEagle':
return [11,4,7,3,6,10,8,9,2,5]
else:
return [11,7,8,9,10,6,3,2,4,5]
def setTeams(self): def setTeams(self):
#print 'SETTING TEAMS:',self._teams """ Sets the offensive and defensive teams and player rosters. """
self._offenseTeamInd = 0 self._offenseTeamInd = 0
self._offenseTeam = self._teams[self._offenseTeamInd] self._offenseTeam = self._teams[self._offenseTeamInd]
self._defenseTeam = self._teams[1-self._offenseTeamInd] self._defenseTeam = self._teams[1-self._offenseTeamInd]
o,_ = self.getPlayers(self._offenseTeam) offensive_roster = self.getOffensiveRoster(self._offenseTeam)
_,d = self.getPlayers(self._defenseTeam) defensive_roster = self.getDefensiveRoster(self._defenseTeam)
self._offenseOrder = [1] + o # 1 for goalie self._offenseOrder = [1] + offensive_roster # 1 for goalie
self._defenseOrder = [1] + d # 1 for goalie self._defenseOrder = [1] + defensive_roster # 1 for goalie
def teamToInd(self,team): def teamToInd(self, team_name):
return self._teams.index(team) """ Returns the index of a given team. """
return self._teams.index(team_name)
def parseMsg(self,msg): def parseMsg(self, msg):
assert(msg[0] == '(') assert(msg[0] == '(')
res,ind = self.__parseMsg(msg,1) res,ind = self.__parseMsg(msg,1)
assert(ind == len(msg)),msg assert(ind == len(msg)),msg
...@@ -157,10 +166,11 @@ class Trainer(object): ...@@ -157,10 +166,11 @@ class Trainer(object):
res.append(msg[startInd:ind]) res.append(msg[startInd:ind])
def initComm(self): def initComm(self):
""" Initialize communication to server. """
self._comm = ClientCommunicator(port=6001) self._comm = ClientCommunicator(port=6001)
self.send('(init (version 8.0))') self.send('(init (version 8.0))')
self.checkMsg('(init ok)',retryCount=5) self.checkMsg('(init ok)',retryCount=5)
#self.send('(eye on)') # self.send('(eye on)')
self.send('(ear on)') self.send('(ear on)')
def _hear(self,body): def _hear(self,body):
...@@ -168,9 +178,9 @@ class Trainer(object): ...@@ -168,9 +178,9 @@ class Trainer(object):
if len(playerInfo) != 3: if len(playerInfo) != 3:
return return
_,team,player = playerInfo _,team,player = playerInfo
if team != self._adhocTeam: if team != self._agentTeam:
return return
if int(player) != self._adhocNumExt: if int(player) != self._agentNumExt:
return return
try: try:
length = int(msg[0]) length = int(msg[0])
...@@ -179,17 +189,13 @@ class Trainer(object): ...@@ -179,17 +189,13 @@ class Trainer(object):
msg = msg[1:length+1] msg = msg[1:length+1]
if msg == 'START': if msg == 'START':
if self._isPlaying: if self._isPlaying:
print 'Already playing, ignoring message' print '[Trainer] Already playing, ignoring message'
else: else:
self.startGame() self.startGame()
elif msg == 'RESWI':
self.reset('reset learning action with ball',False,True,True)
elif msg == 'RESWO':
self.reset('reset learning action withOUT ball',False,True,False)
elif msg == 'DONE': elif msg == 'DONE':
raise DoneError raise DoneError
else: else:
print 'Unhandled message from ad hoc agent: %s' % msg print '[Trainer] Unhandled message from agent: %s' % msg
def initMsgHandlers(self): def initMsgHandlers(self):
self._msgHandlers = [] self._msgHandlers = []
...@@ -212,7 +218,7 @@ class Trainer(object): ...@@ -212,7 +218,7 @@ class Trainer(object):
def checkMsg(self,expectedMsg,retryCount=None): def checkMsg(self,expectedMsg,retryCount=None):
msg = self.recv(retryCount) msg = self.recv(retryCount)
if msg != expectedMsg: if msg != expectedMsg:
print >>sys.stderr,'Error with message' print >>sys.stderr,'[Trainer] Error with message'
print >>sys.stderr,' expected: %s' % expectedMsg print >>sys.stderr,' expected: %s' % expectedMsg
print >>sys.stderr,' received: %s' % msg print >>sys.stderr,' received: %s' % msg
print >>sys.stderr,len(expectedMsg),len(msg) print >>sys.stderr,len(expectedMsg),len(msg)
...@@ -221,7 +227,9 @@ class Trainer(object): ...@@ -221,7 +227,9 @@ class Trainer(object):
def extractPoint(self,msg): def extractPoint(self,msg):
return numpy.array(map(float,msg[:2])) return numpy.array(map(float,msg[:2]))
def convertToExtPlayer(self,team,num): def convertToExtPlayer(self, team, num):
""" Returns the external player number for a given player. """
assert team == self._offenseTeam or team == self._defenseTeam
if team == self._offenseTeam: if team == self._offenseTeam:
return self._offenseOrder[num] return self._offenseOrder[num]
else: else:
...@@ -250,15 +258,18 @@ class Trainer(object): ...@@ -250,15 +258,18 @@ class Trainer(object):
self._playerPositions[playerNum,:,team] = self.extractPoint(objData) self._playerPositions[playerNum,:,team] = self.extractPoint(objData)
def registerMsgHandler(self,handler,*args,**kwargs): def registerMsgHandler(self,handler,*args,**kwargs):
'''register a message handler '''Register a message handler.
handler will be called on a message that matches *args'''
Handler will be called on a message that matches *args.
'''
args = list(args) args = list(args)
i,_,_ = self._findHandlerInd(args) i,_,_ = self._findHandlerInd(args)
if i < 0: if i < 0:
self._msgHandlers.append([args,handler]) self._msgHandlers.append([args,handler])
else: else:
if ('quiet' not in kwargs) or (not kwargs['quiet']): if ('quiet' not in kwargs) or (not kwargs['quiet']):
print 'Updating handler for %s' % (' '.join(args)) print '[Trainer] Updating handler for %s' % (' '.join(args))
self._msgHandlers[i] = [args,handler] self._msgHandlers[i] = [args,handler]
def unregisterMsgHandler(self,*args): def unregisterMsgHandler(self,*args):
...@@ -277,7 +288,7 @@ class Trainer(object): ...@@ -277,7 +288,7 @@ class Trainer(object):
def handleMsg(self,msg): def handleMsg(self,msg):
i,prefixLength,handler = self._findHandlerInd(msg) i,prefixLength,handler = self._findHandlerInd(msg)
if i < 0: if i < 0:
print 'Unhandled message:',msg[0:2] print '[Trainer] Unhandled message:',msg[0:2]
else: else:
handler(msg[prefixLength:]) handler(msg[prefixLength:])
...@@ -330,20 +341,22 @@ class Trainer(object): ...@@ -330,20 +341,22 @@ class Trainer(object):
self.ignoreMsg(*partial,quiet=True) self.ignoreMsg(*partial,quiet=True)
def startGame(self): def startGame(self):
self.reset('Start',False) """ Starts a game of HFO. """
self.registerMsgHandler(self.seeGlobal,'see_global') self.reset()
self.registerMsgHandler(self.seeGlobal,'ok','look',quiet=True) self.registerMsgHandler(self.seeGlobal, 'see_global')
self.registerMsgHandler(self.seeGlobal, 'ok', 'look', quiet=True)
#self.registerMsgHandler(self.checkBall,'ok','check_ball') #self.registerMsgHandler(self.checkBall,'ok','check_ball')
self.send('(look)') self.send('(look)')
self._isPlaying = True self._isPlaying = True
def calcBallHolder(self): def calcBallHolder(self):
'''calculates the ball holder, returns results in teamInd,playerInd''' '''Calculates the ball holder, returns results in teamInd, playerInd. '''
totalHeld = 0 totalHeld = 0
for team in self._teams: for team in self._teams:
for i in range(11): for i in range(11):
pos = self._playerPositions[i,:,self.teamToInd(team)] pos = self._playerPositions[i,:,self.teamToInd(team)]
distBound = self._SP['kickable_margin'] + self._SP['player_size'] + self._SP['ball_size'] distBound = self._SP['kickable_margin'] + self._SP['player_size'] \
+ self._SP['ball_size']
distBound *= self.HOLD_FACTOR distBound *= self.HOLD_FACTOR
if numpy.linalg.norm(self._ballPosition - pos) < distBound: if numpy.linalg.norm(self._ballPosition - pos) < distBound:
self._ballHeld[i,self.teamToInd(team)] += 1 self._ballHeld[i,self.teamToInd(team)] += 1
...@@ -360,90 +373,75 @@ class Trainer(object): ...@@ -360,90 +373,75 @@ class Trainer(object):
return None,None return None,None
def isGoal(self): def isGoal(self):
return (self._ballPosition[0] > self._allowedBallX[1]) and (numpy.abs(self._ballPosition[1]) < self._SP['goal_width']) """ Returns true if a goal has been scored. """
return (self._ballPosition[0] > self._allowedBallX[1]) \
and (numpy.abs(self._ballPosition[1]) < self._SP['goal_width'])
def isOOB(self): def isOOB(self):
# check ball x """ Returns true if the ball is out of bounds. """
#return self._ballPosition[0] < self._allowedBallX[0] return self._ballPosition[0] < self._allowedBallX[0] \
if (self._ballPosition[0] < self._allowedBallX[0]) or (self._ballPosition[0] > self._allowedBallX[1]): or self._ballPosition[0] > self._allowedBallX[1] \
return True or self._ballPosition[1] < self._allowedBallY[0] \
# check ball y or self._ballPosition[1] > self._allowedBallY[1]
if (self._ballPosition[1] < self._allowedBallY[0]) or (self._ballPosition[1] > self._allowedBallY[1]):
return True def movePlayer(self, team, internal_num, pos, convertToExt=True):
return False """ Move a player to a specified position.
def movePlayer(self,team,num,pos,convertToExt=True): Args:
if convertToExt: team: the team name of the player
num = self.convertToExtPlayer(team,num) interal_num: the player's internal number
self.send('(move (player %s %i) %f %f)' % (team,num,pos[0],pos[1])) pos: position to move player to
convertToExt: convert interal player num to external
def moveBall(self,pos): """
num = self.convertToExtPlayer(team, internal_num) if convertToExt \
else internal_num
self.send('(move (player %s %i) %f %f)' % (team, num, pos[0], pos[1]))
def moveBall(self, pos):
""" Moves the ball to a specified x,y position. """
self.send('(move (ball) %f %f 0.0 0.0 0.0)' % tuple(pos)) self.send('(move (ball) %f %f 0.0 0.0 0.0)' % tuple(pos))
def randomPosInBounds(self,xBounds,yBounds): def randomPointInBounds(self, xBounds=None, yBounds=None):
"""Returns a random point inside of the box defined by xBounds,
yBounds. Where xBounds=[x_min, x_max] and yBounds=[y_min,
y_max]. Defaults to the xy-bounds of the playable HFO area.
"""
if xBounds is None:
xBounds = self.allowedBallX
if yBounds is None:
yBounds = self.allowedBallY
pos = numpy.zeros(2) pos = numpy.zeros(2)
bounds = [xBounds,yBounds] bounds = [xBounds, yBounds]
for i in range(2): for i in range(2):
pos[i] = self._rng.rand() * (bounds[i][1] - bounds[i][0]) + bounds[i][0] pos[i] = self._rng.rand() * (bounds[i][1] - bounds[i][0]) + bounds[i][0]
return pos return pos
def boundPoint(self,pos): def boundPoint(self, pos):
# x """Ensures a point is within the minimum and maximum bounds of the
if pos[0] < self._allowedBallX[0]: HFO playing area.
pos[0] = self._allowedBallX[0]
elif pos[0] > self._allowedBallX[1]:
pos[0] = self._allowedBallX[1]
# y
if pos[1] < self._allowedBallY[0]:
pos[1] = self._allowedBallY[0]
elif pos[1] > self._allowedBallY[1]:
pos[1] = self._allowedBallY[1]
return pos
def reset(self,msg,inc=True,learnActionReset=False,adhocAgentHasBall=False): """
if inc: pos[0] = min(max(pos[0], self._allowedBallX[0]), self._allowedBallX[1])
self._numTrials += 1 pos[1] = min(max(pos[1], self._allowedBallY[0]), self._allowedBallY[1])
self._numFrames += self._frame - self._lastTrialStart return pos
if not learnActionReset:
print '%2i /%2i %5i %s' % (self._numGoals,self._numTrials,self._frame,msg)
if (self._maxTrials > 0) and (self._numTrials >= self._maxTrials):
raise DoneError
if (self._maxFrames > 0) and (self._numFrames >= self._maxFrames):
raise DoneError
if learnActionReset: def reset(self):
self.resetPositionsActionLearning(adhocAgentHasBall) """ Resets the HFO domain by moving the ball and players. """
else: self.resetBallPosition()
self.resetPositions() self.resetPlayerPositions()
self.send('(recover)') self.send('(recover)')
self.send('(change_mode play_on)') self.send('(change_mode play_on)')
self.send('(say RESET)') self.send('(say RESET)')
self._lastTrialStart = self._frame
def resetPositionsActionLearning(self,adhocAgentHasBall): def resetBallPosition(self):
for i in range(1, self._numDefense): """Reset the position of the ball for a new HFO trial. """
if adhocAgentHasBall and (not self._options.adhocOffense) and (i == self._adhocNumInt): continue self._ballPosition = self.boundPoint(self.randomPointInBounds(
pos = self.boundPoint(self.randomPosInBounds(self._allowedBallX,self._allowedBallY)) .2*self._allowedBallX+.05*self.PITCH_LENGTH, .8*self._allowedBallY))
self.movePlayer(self._defenseTeam,i,pos)
for i in range(1, self._numOffense):
if adhocAgentHasBall and self._options.adhocOffense and (i == self._adhocNumInt): continue
pos = self.boundPoint(self.randomPosInBounds(self._allowedBallX,self._allowedBallY))
self.movePlayer(self._offenseTeam,i,pos)
self._ballPosition = self.boundPoint(self.randomPosInBounds(self._allowedBallX,self._allowedBallY))
self.moveBall(self._ballPosition) self.moveBall(self._ballPosition)
if adhocAgentHasBall:
# start the agent with the ball in the kickable margin def getOffensiveResetPosition(self):
r = self._rng.rand() * self._SP['kickable_margin'] """ Returns a random position for an offensive player. """
a = self._rng.rand() * 2 * numpy.pi
offset = r * numpy.array([numpy.cos(a),numpy.sin(a)])
self.movePlayer(self._adhocTeam,self._adhocNumExt,self._ballPosition + offset,convertToExt=False)
def resetPositions(self):
print 'in reset position'
self._ballPosition = self.boundPoint(self.randomPosInBounds(0.20 * self._allowedBallX + 0.05 * self.PITCH_LENGTH,0.8 * self._allowedBallY))
self.moveBall(self._ballPosition)
# set up offense
self.movePlayer(self._offenseTeam,0,[-0.5 * self.PITCH_LENGTH,0])
offsets = [ offsets = [
[-1,-1], [-1,-1],
[-1,1], [-1,1],
...@@ -456,77 +454,129 @@ class Trainer(object): ...@@ -456,77 +454,129 @@ class Trainer(object):
[2,2], [2,2],
[2,-2], [2,-2],
] ]
for i,o in zip(range(1,self._numOffense),offsets): offset = offsets[self._rng.randint(len(offsets))]
offset = 0.1 * self.PITCH_LENGTH * self._rng.rand(2) + 0.1 * self.PITCH_LENGTH * numpy.array(o) offset_from_ball = 0.1 * self.PITCH_LENGTH * self._rng.rand(2) + \
pos = self.boundPoint(self._ballPosition + offset) 0.1 * self.PITCH_LENGTH * numpy.array(offset)
self.movePlayer(self._offenseTeam,i,pos) return self.boundPoint(self._ballPosition + offset_from_ball)
# set up defense
self.movePlayer(self._defenseTeam,0,[0.5 * self.PITCH_LENGTH,0]) def getDefensiveResetPosition(self):
for i in range(1,self._numDefense): """ Returns a random position for a defensive player. """
pos = self.boundPoint(self.randomPosInBounds([0.5 * 0.5 * self.PITCH_LENGTH,0.75 * 0.5 * self.PITCH_LENGTH],0.8 * self._allowedBallY)) return self.boundPoint(self.randomPointInBounds(
self.movePlayer(self._defenseTeam,i,pos) [0.5 * 0.5 * self.PITCH_LENGTH, 0.75 * 0.5 * self.PITCH_LENGTH],
0.8 * self._allowedBallY))
def nullifyOtherPlayers(self):
# offense def resetPlayerPositions(self):
for i in range(self._numOffense,11): """Reset the positions of the players. This is called after a trial
self.movePlayer(self._offenseTeam,i,[-0.25 * self.PITCH_LENGTH,0]) ends to setup for the next trial.
# defense
for i in range(self._numDefense,11): """
self.movePlayer(self._defenseTeam,i,[-0.25 * self.PITCH_LENGTH,0]) # Always Move the offensive goalie
self.movePlayer(self._offenseTeam, 0, [-0.5 * self.PITCH_LENGTH, 0])
# Move the rest of the offense
for i in xrange(1, self._numOffense + 1):
self.movePlayer(self._offenseTeam, i, self.getOffensiveResetPosition())
# Move the defensive goalie
if self._numDefense > 0:
self.movePlayer(self._defenseTeam, 0, [0.5 * self.PITCH_LENGTH,0])
# Move the rest of the defense
for i in xrange(1, self._numDefense):
self.movePlayer(self._defenseTeam, i, self.getDefensiveResetPosition())
def removeNonHFOPlayers(self):
"""Removes players that aren't involved in HFO game.
The players whose numbers are greater than numOffense/numDefense
are sent to left-field.
"""
for i in xrange(self._numOffense + 1, 11):
# Don't remove the learning agent
if not self._agent or i != self._agentNumInt or \
self._agentTeam != self._offenseTeam:
self.movePlayer(self._offenseTeam, i, [-0.25 * self.PITCH_LENGTH, 0])
for i in xrange(self._numDefense, 11):
# Don't remove the learning agent
if not self._agent or i != self._agentNumInt or \
self._agentTeam != self._defenseTeam:
self.movePlayer(self._defenseTeam, i, [-0.25 * self.PITCH_LENGTH, 0])
def step(self): def step(self):
#print 'step',self._frame """ Takes a simulated step. """
#self.send('(check_ball)') # self.send('(check_ball)')
self.nullifyOtherPlayers() self.removeNonHFOPlayers()
heldTeam,heldPlayer = self.calcBallHolder() team_holding_ball, player_holding_ball = self.calcBallHolder()
if heldTeam is not None: if team_holding_ball is not None:
self._lastFrameBallTouched = self._frame self._lastFrameBallTouched = self._frame
if self.trialOver(team_holding_ball):
self.updateResults(team_holding_ball)
self.reset()
# don't reset too fast, stuff is still happening def updateResults(self, team_holding_ball):
if self._frame - self._lastTrialStart < 5: """ Updates the various members after a trial has ended. """
return
if not self._options.learnActions:
self.doResetIfNeeded(heldTeam)
def doResetIfNeeded(self,heldTeam):
if self.isGoal(): if self.isGoal():
self._numGoals += 1 self._numGoals += 1
self.reset('Goal') result = 'Goal'
return elif self.isOOB():
if self.isOOB():
self._numBallsOOB += 1 self._numBallsOOB += 1
self.reset('Out of bounds') result = 'Out of Bounds'
return elif team_holding_ball not in [None,self._offenseTeamInd]:
if heldTeam not in [None,self._offenseTeamInd]:
self._numBallsCaptured += 1 self._numBallsCaptured += 1
self.reset('Defense captured') result = 'Defense Captured'
return elif self._frame - self._lastFrameBallTouched > self.UNTOUCHED_LENGTH:
if self._frame - self._lastFrameBallTouched > self.UNTOUCHED_LENGTH:
self._lastFrameBallTouched = self._frame self._lastFrameBallTouched = self._frame
self.reset('Untouched for too long',False) result = 'Ball untouched for too long'
return else:
print '[Trainer] Error: Unable to detect reason for End of Trial!'
sys.exit(1)
print '[Trainer] EndOfTrial: %2i /%2i %5i %s'%\
(self._numGoals, self._numTrials, self._frame, result)
self._numTrials += 1
self._numFrames += self._frame - self._lastTrialStart
self._lastTrialStart = self._frame
if (self._maxTrials > 0) and (self._numTrials >= self._maxTrials):
raise DoneError
if (self._maxFrames > 0) and (self._numFrames >= self._maxFrames):
raise DoneError
def trialOver(self, team_holding_ball):
"""Returns true if the trial has ended for one of the following
reasons: Goal scored, out of bounds, captured by defense, or
untouched for too long.
"""
# The trial is still being setup, it cannot be over.
if self._frame - self._lastTrialStart < 5:
return False
return self.isGoal() \
or self.isOOB() \
or (team_holding_ball not in [None, self._offenseTeamInd]) \
or (self._frame - self._lastFrameBallTouched > self.UNTOUCHED_LENGTH)
def printStats(self): def printStats(self):
print ''
print 'Num frames in completed trials : %i' % (self._numFrames) print 'Num frames in completed trials : %i' % (self._numFrames)
print 'Trials : %i' % self._numTrials print 'Trials : %i' % self._numTrials
print 'Goals : %i' % self._numGoals print 'Goals : %i' % self._numGoals
print 'Defense Captured : %i' % self._numBallsCaptured print 'Defense Captured : %i' % self._numBallsCaptured
print 'Balls Out of Bounds: %i' % self._numBallsOOB print 'Balls Out of Bounds: %i' % self._numBallsOOB
def checkLive(self,necProcesses): def checkLive(self, necProcesses):
"""Returns true if each of the necessary processes is still alive and
running.
"""
for p,name in necProcesses: for p,name in necProcesses:
if p.poll() is not None: if p.poll() is not None:
print 'Something necessary closed (%s), exiting' % name print '[Trainer] Something necessary closed (%s), exiting' % name
return False return False
return True return True
def run(self,necProcesses): def run(self, necProcesses):
""" Run the trainer.
"""
try: try:
if self._options.useAdhoc: if self._agent:
self._adhocPopen = self.launchAdhoc() self._agentPopen = self.launch_agent()
necProcesses.append([self._adhocPopen,'adhoc']) necProcesses.append([self._agentPopen,'agent'])
print 'starting game'
self.startGame() self.startGame()
while self.checkLive(necProcesses): while self.checkLive(necProcesses):
prevFrame = self._frame prevFrame = self._frame
...@@ -534,26 +584,15 @@ class Trainer(object): ...@@ -534,26 +584,15 @@ class Trainer(object):
if self._frame != prevFrame: if self._frame != prevFrame:
self.step() self.step()
except TimeoutError: except TimeoutError:
print 'Haven\'t heard from the server for too long, Exiting' print '[Trainer] Haven\'t heard from the server for too long, Exiting'
except (KeyboardInterrupt, DoneError): except (KeyboardInterrupt, DoneError):
print 'Exiting' print '[Trainer] Exiting'
finally: finally:
if self._adhocPopen is not None: if self._agentPopen is not None:
self._adhocPopen.send_signal(SIGINT) self._agentPopen.send_signal(SIGINT)
try: try:
self._comm.sendMsg('(bye)') self._comm.sendMsg('(bye)')
except: except:
pass pass
self._comm.close() self._comm.close()
self.printStats() self.printStats()
if __name__ == '__main__':
#seed = int(time.time())
assert(len(sys.argv) == 1)
#team1 = sys.argv[1]
#team2 = sys.argv[2]
seed = int(sys.argv[1])
print 'Random Seed:',seed
t = Trainer(seed=seed)
t.run()
#!/usr/bin/env python #!/usr/bin/env python
# encoding: utf-8 # encoding: utf-8
import subprocess, os, time, numpy import subprocess, os, time, numpy, sys
from signal import SIGINT from signal import SIGINT
# UT_AGENT_DIR = '/u/mhauskn/projects/hfo/bin/' # Global list of all/essential running processes
OTHER_AGENT_DIR = '/projects/agents2/villasim/opponents2D/' processes, necProcesses = [], []
# Command to run the rcssserver. Edit as needed.
SERVER_CMD = 'rcssserver server::port=6000 server::coach_port=6001 server::olcoach_port=6002 server::coach=1 server::game_log_dir=/tmp server::text_log_dir=/tmp' SERVER_CMD = 'rcssserver server::port=6000 server::coach_port=6001 \
server::olcoach_port=6002 server::coach=1 server::game_log_dir=/tmp \
server::text_log_dir=/tmp'
# Command to run the monitor. Edit as needed.
MONITOR_CMD = 'rcssmonitor' MONITOR_CMD = 'rcssmonitor'
def getAgentDirCmd(name,first): def getAgentDirCmd(name, first):
if name == 'ut': """ Returns the team name, command, and directory to run a team. """
if first: cmd = './start.sh -t %s' % name
name = 'ut1' dir = os.path.dirname(os.path.realpath(__file__))
else: return name, cmd, dir
name = 'ut2'
cmd = './start.sh -t %s' % name
dir = os.path.dirname(os.path.realpath(__file__))
elif name == 'base':
dir = os.path.join(OTHER_AGENT_DIR,name,'src')
if first:
name = 'base1'
else:
name = 'base2'
cmd = './start.sh -t %s' % name
else:
cmd = './start.sh'
dir = os.path.join(OTHER_AGENT_DIR,name)
return name,cmd,dir
#team2 = 'oxsy' # fcportugal2d, gdut-tiji, marlik, nadco-2d, warthog def launch(cmd, necessary=True, supressOutput=True, name='Unknown'):
"""Launch a process.
Appends to list of processes and (optionally) necProcesses if
necessary flag is True.
processes = [] Returns: The launched process.
necProcesses = []
def launch(cmd,necessary=True,supressOutput=True,name='Unknown'): """
kwargs = {} kwargs = {}
if supressOutput: if supressOutput:
kwargs = {'stdout':open('/dev/null','w'),'stderr':open('/dev/null','w')} kwargs = {'stdout':open('/dev/null','w'),'stderr':open('/dev/null','w')}
...@@ -46,42 +37,39 @@ def launch(cmd,necessary=True,supressOutput=True,name='Unknown'): ...@@ -46,42 +37,39 @@ def launch(cmd,necessary=True,supressOutput=True,name='Unknown'):
necProcesses.append([p,name]) necProcesses.append([p,name])
return p return p
def main(team1,team2,rng,options): def main(team1, team2, rng, args):
"""Sets up the teams, launches the server and monitor, starts the
trainer.
"""
serverOptions = '' serverOptions = ''
if options.sync: if args.sync:
serverOptions += ' server::synch_mode=on' serverOptions += ' server::synch_mode=on'
team1, team1Cmd, team1Dir = getAgentDirCmd(team1, True)
team1,team1Cmd,team1Dir = getAgentDirCmd(team1,True) team2, team2Cmd, team2Dir = getAgentDirCmd(team2, False)
team2,team2Cmd,team2Dir = getAgentDirCmd(team2,False) assert os.path.isdir(team1Dir)
if not os.path.isdir(team1Dir): assert os.path.isdir(team2Dir)
print 'Bad team 1: %s' % team1
sys.exit(1)
if not os.path.isdir(team2Dir):
print 'Bad team 2: %s' % team2
sys.exit(1)
try: try:
launch(SERVER_CMD + serverOptions,name='server') # Launch the Server
launch(SERVER_CMD + serverOptions, name='server')
time.sleep(0.2) time.sleep(0.2)
if not options.headless: if not args.headless:
launch(MONITOR_CMD,name='monitor') launch(MONITOR_CMD,name='monitor')
# Launch the Trainer
# launch trainer
from Trainer import Trainer from Trainer import Trainer
seed = rng.randint(numpy.iinfo('i').max) trainer = Trainer(args=args, rng=rng)
trainer = Trainer(seed=seed,options=options)
trainer.initComm() trainer.initComm()
# start team 1 # Start Team1
os.chdir(team1Dir) os.chdir(team1Dir)
launch(team1Cmd,False) launch(team1Cmd,False)
trainer.waitOnTeam(True) # wait to make sure of team order trainer.waitOnTeam(True) # wait to make sure of team order
# start team 2 # Start Team2
os.chdir(team2Dir) os.chdir(team2Dir)
launch(team2Cmd,False) launch(team2Cmd,False)
trainer.waitOnTeam(False) trainer.waitOnTeam(False)
# make sure all players are connected # Make sure all players are connected
trainer.checkIfAllPlayersConnected() trainer.checkIfAllPlayersConnected()
trainer.setTeams() trainer.setTeams()
# run # Run HFO
trainer.run(necProcesses) trainer.run(necProcesses)
except KeyboardInterrupt: except KeyboardInterrupt:
print 'Exiting for CTRL-C' print 'Exiting for CTRL-C'
...@@ -91,42 +79,27 @@ def main(team1,team2,rng,options): ...@@ -91,42 +79,27 @@ def main(team1,team2,rng,options):
p.send_signal(SIGINT) p.send_signal(SIGINT)
except: except:
pass pass
#print 'Done killing children (hopefully)'
time.sleep(0.1) time.sleep(0.1)
if __name__ == '__main__': if __name__ == '__main__':
import sys import argparse
p = argparse.ArgumentParser(description='Start Half Field Offense.')
from optparse import OptionParser p.add_argument('--headless', dest='headless', action='store_true',
help='Run without a monitor')
p = OptionParser('''Usage: ./startHFO.py [team1 [team2]] p.add_argument('--trials', dest='numTrials', type=int, default=-1,
teams are ut or the ones in the agents directory''') help='Number of trials to run')
p.add_option('-s','--no-sync',dest='sync',action='store_false',default=True,help='run server in non-sync mode') p.add_argument('--frames', dest='numFrames', type=int, default=-1,
p.add_option('--headless',dest='headless',action='store_true',default=False,help='run in headless mode') help='Number of frames to run for')
p.add_option('-a','--adhoc',dest='useAdhoc',action='store_true',default=False,help='use an adhoc agent') p.add_argument('--offense', dest='numOffense', type=int, default=4,
p.add_option('-d','--adhocDefense',dest='adhocOffense',action='store_false',default=True,help='put the ad hoc agent on defense') help='Number of offensive players')
p.add_option('-n','--numTrials',dest='numTrials',action='store',type='int',default=-1,help='number of trials to run') p.add_argument('--defense', dest='numDefense', type=int, default=4,
p.add_option('-f','--frames',dest='numFrames',action='store',type='int',default=-1,help='number of frames to run for') help='Number of defensive players')
p.add_option('--offense',dest='numOffense',action='store',type='int',default=4,help='number of offensive players') p.add_argument('--play-defense', dest='play_offense',
p.add_option('--defense',dest='numDefense',action='store',type='int',default=4,help='number of defensive players (excluding the goalie)') action='store_false', default=True,
p.add_option('--learn-actions',dest='numLearnActions',action='store',type='int',default=0,help='number of instances to learn actions instead of the regular behavior') help='Put the learning agent on defensive team')
p.add_argument('--no-agent', dest='no_agent', action='store_true',
options,args = p.parse_args() help='Don\'t use a learning agent.')
if len(args) > 2: p.add_argument('--no-sync', dest='sync', action='store_false', default=True,
print 'Incorrect number of arguments' help='Run server in non-sync mode')
p.parse_args(['--help']) args = p.parse_args()
sys.exit(2) main(team1='left', team2='right', rng=numpy.random.RandomState(), args=args)
options.learnActions = (options.numLearnActions > 0)
team1 = 'ut'
team2 = 'ut'
if len(args) >= 1:
team1 = args[0]
if len(args) >= 2:
team2 = args[1]
seed = int(time.time())
rng = numpy.random.RandomState(seed)
main(team1,team2,rng,options)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment