Commit 86cf7950 authored by Matthew Hausknecht's avatar Matthew Hausknecht

Refactored arguments and support for multiple agents.

parent 70a2b0ed
...@@ -31,16 +31,18 @@ class DummyPopen(object): ...@@ -31,16 +31,18 @@ class DummyPopen(object):
class Trainer(object): class Trainer(object):
""" Trainer is responsible for setting up the players and game. """ Trainer is responsible for setting up the players and game.
""" """
def __init__(self, args, rng=numpy.random.RandomState()): def __init__(self, args, rng=numpy.random.RandomState(), server_port=6001,
coach_port=6002):
self._rng = rng # The Random Number Generator self._rng = rng # The Random Number Generator
self._serverPort = args.port + 1 # The port the server is listening on self._serverPort = server_port # The port the server is listening on
self._coachPort = args.port + 2 # The coach port to talk with the server self._coachPort = coach_port # The coach port to talk with the server
self._logDir = args.logDir # Directory to store logs self._logDir = args.logDir # Directory to store logs
self._record = args.record # Record states + actions self._record = args.record # Record states + actions
self._numOffense = args.numOffense # Number offensive players self._numOffense = args.offenseAgents + args.offenseNPCs # Number offensive players
self._numDefense = args.numDefense # Number defensive players self._numDefense = args.defenseAgents + args.defenseNPCs # Number defensive players
self._maxTrials = args.numTrials # Maximum number of trials to play self._maxTrials = args.numTrials # Maximum number of trials to play
self._maxFrames = args.numFrames # Maximum number of frames to play self._maxFrames = args.numFrames # Maximum number of frames to play
self._maxFramesPerTrial = args.maxFramesPerTrial
# =============== FIELD DIMENSIONS =============== # # =============== FIELD DIMENSIONS =============== #
self.NUM_FRAMES_TO_HOLD = 2 # Hold ball this many frames to capture self.NUM_FRAMES_TO_HOLD = 2 # Hold ball this many frames to capture
self.HOLD_FACTOR = 1.5 # Gain to calculate ball control self.HOLD_FACTOR = 1.5 # Gain to calculate ball control
...@@ -62,51 +64,60 @@ class Trainer(object): ...@@ -62,51 +64,60 @@ class Trainer(object):
self._numBallsOOB = 0 # Trials in which ball went out of bounds self._numBallsOOB = 0 # Trials in which ball went out of bounds
self._numOutOfTime = 0 # Trials that ran out of time self._numOutOfTime = 0 # Trials that ran out of time
# =============== AGENT =============== # # =============== AGENT =============== #
self._agent = not args.no_agent # Indicates if a learning agent is active self._numAgents = args.offenseAgents + args.defenseAgents
self._agent_play_offense = args.play_offense # Agent's role self._offenseAgents = args.offenseAgents
self._agentTeam = '' # Name of the team the agent is playing for self._defenseAgents = args.defenseAgents
self._agentNumInt = -1 # Agent's internal team number self._agentTeams = [] # Names of the teams the agents are playing for
self._agentNumExt = -1 # Agent's external team number self._agentNumInt = [] # List of agents internal team numbers
self._agentServerPort = args.port # Port for agent's server self._agentNumExt = [] # List of agents external team numbers
self._agentServerPort = args.port # Base Port for agent's server
self._agentOnBall = args.agent_on_ball # If true, agent starts with the ball self._agentOnBall = args.agent_on_ball # If true, agent starts with the ball
# =============== MISC =============== # # =============== MISC =============== #
self._offenseTeam = '' # Name of the offensive team self._offenseTeamName = '' # Name of the offensive team
self._defenseTeam = '' # Name of the defensive team self._defenseTeamName = '' # Name of the defensive team
self._playerPositions = numpy.zeros((11,2,2)) # Positions of the players self._playerPositions = numpy.zeros((11,2,2)) # Positions of the players
self._ballPosition = numpy.zeros(2) # Position of the ball self._ballPosition = numpy.zeros(2) # Position of the ball
self._ballHeld = numpy.zeros((11,2)) # Track player holding the ball self._ballHeld = numpy.zeros((11,2)) # Track player holding the ball
self._teams = [] # Team indexes for offensive and defensive teams self._teams = [] # Team indexes for offensive and defensive teams
self._SP = {} # Sever Parameters. Recieved when connecting to the server. self._SP = {} # Sever Parameters. Recieved when connecting to the server.
self._isPlaying = False # Is a game being played? self._isPlaying = False # Is a game being played?
self._teamHoldingBall = None # Team currently in control of the ball
self._playerHoldingBall = None # Player current in control of ball
self._agentPopen = [] # Agent's processes self._agentPopen = [] # Agent's processes
self.initMsgHandlers() self.initMsgHandlers()
def launch_agent(self, player_num): def launch_agent(self, agent_num, play_offense, port):
"""Launch the learning agent using the start.sh script and return a """Launch the learning agent using the start_agent.sh script and
DummyPopen for the process. return a DummyPopen for the process.
""" """
print '[Trainer] Launching Agent'+str(player_num) print '[Trainer] Launching Agent', str(agent_num)
if self._agent_play_offense: if play_offense:
assert self._numOffense > 0 assert self._numOffense > 0
self._agentTeam = self._offenseTeam team_name = self._offenseTeamName
self._agentNumInt = player_num + 1 self._agentTeams.append(team_name)
# First offense number is reserved for inactive offensive goalie
internal_player_num = agent_num + 1
self._agentNumInt.append(internal_player_num)
numTeammates = self._numOffense - 1 numTeammates = self._numOffense - 1
numOpponents = self._numDefense numOpponents = self._numDefense
else: else:
assert self._numDefense > 0 assert self._numDefense > 0
self._agentTeam = self._defenseTeam team_name = self._defenseTeamName
self._agentNumInt = player_num self._agentTeams.append(team_name)
numTeammates = self._numOffense internal_player_num = agent_num
numOpponents = self._numDefense - 1 self._agentNumInt.append(internal_player_num)
self._agentNumExt = self.convertToExtPlayer(self._agentTeam, numTeammates = self._numDefense - 1
self._agentNumInt) numOpponents = self._numOffense
ext_num = self.convertToExtPlayer(team_name, internal_player_num)
self._agentNumExt.append(ext_num)
binary_dir = os.path.dirname(os.path.realpath(__file__)) binary_dir = os.path.dirname(os.path.realpath(__file__))
agentCmd = 'start_agent.sh -t %s -u %i -p %i -P %i --log-dir %s'\ agentCmd = 'start_agent.sh -t %s -u %i -p %i -P %i --log-dir %s'\
' --numTeammates %i --numOpponents %i'\ ' --numTeammates %i --numOpponents %i'\
' --playingOffense %i --serverPort %i'\ ' --playingOffense %i --serverPort %i'\
%(self._agentTeam, self._agentNumExt, self._serverPort, %(team_name, ext_num, self._serverPort,
self._coachPort, self._logDir, numTeammates, numOpponents, self._coachPort, self._logDir, numTeammates,
self._agent_play_offense, (self._agentServerPort - player_num)) numOpponents, play_offense, port)
if self._record: if self._record:
agentCmd += ' --record' agentCmd += ' --record'
agentCmd = os.path.join(binary_dir, agentCmd) agentCmd = os.path.join(binary_dir, agentCmd)
...@@ -152,10 +163,10 @@ class Trainer(object): ...@@ -152,10 +163,10 @@ class Trainer(object):
def setTeams(self): def setTeams(self):
""" Sets the offensive and defensive teams and player rosters. """ """ Sets the offensive and defensive teams and player rosters. """
self._offenseTeamInd = 0 self._offenseTeamInd = 0
self._offenseTeam = self._teams[self._offenseTeamInd] self._offenseTeamName = self._teams[self._offenseTeamInd]
self._defenseTeam = self._teams[1-self._offenseTeamInd] self._defenseTeamName = self._teams[1-self._offenseTeamInd]
offensive_roster = self.getOffensiveRoster(self._offenseTeam) offensive_roster = self.getOffensiveRoster(self._offenseTeamName)
defensive_roster = self.getDefensiveRoster(self._defenseTeam) defensive_roster = self.getDefensiveRoster(self._defenseTeamName)
self._offenseOrder = [1] + offensive_roster # 1 for goalie self._offenseOrder = [1] + offensive_roster # 1 for goalie
self._defenseOrder = [1] + defensive_roster # 1 for goalie self._defenseOrder = [1] + defensive_roster # 1 for goalie
...@@ -208,10 +219,6 @@ class Trainer(object): ...@@ -208,10 +219,6 @@ class Trainer(object):
if len(playerInfo) != 3: if len(playerInfo) != 3:
return return
_,team,player = playerInfo _,team,player = playerInfo
if team != self._agentTeam:
return
if int(player) != self._agentNumExt:
return
try: try:
length = int(msg[0]) length = int(msg[0])
except: except:
...@@ -264,15 +271,17 @@ class Trainer(object): ...@@ -264,15 +271,17 @@ class Trainer(object):
def convertToExtPlayer(self, team, num): def convertToExtPlayer(self, team, num):
""" Returns the external player number for a given player. """ """ Returns the external player number for a given player. """
assert team == self._offenseTeam or team == self._defenseTeam assert team == self._offenseTeamName or team == self._defenseTeamName,\
if team == self._offenseTeam: 'Invalid team name %s. Valid choices: %s, %s.'\
%(team, self._offenseTeamName, self._defenseTeamName)
if team == self._offenseTeamName:
return self._offenseOrder[num] return self._offenseOrder[num]
else: else:
return self._defenseOrder[num] return self._defenseOrder[num]
def convertFromExtPlayer(self, team, num): def convertFromExtPlayer(self, team, num):
""" Maps external player number to internal player number. """ """ Maps external player number to internal player number. """
if team == self._offenseTeam: if team == self._offenseTeamName:
return self._offenseOrder.index(num) return self._offenseOrder.index(num)
else: else:
return self._defenseOrder.index(num) return self._defenseOrder.index(num)
...@@ -437,9 +446,18 @@ class Trainer(object): ...@@ -437,9 +446,18 @@ class Trainer(object):
or self._ballPosition[1] < self._allowedBallY[0] \ or self._ballPosition[1] < self._allowedBallY[0] \
or self._ballPosition[1] > self._allowedBallY[1] or self._ballPosition[1] > self._allowedBallY[1]
def isCaptured(self):
""" Returns true if the ball is captured by defense. """
return self._teamHoldingBall not in [None,self._offenseTeamInd]
def isOOT(self):
""" Returns true if the trial has run out of time. """
return self._frame - self._lastFrameBallTouched > self.UNTOUCHED_LENGTH \
or (self._maxFramesPerTrial > 0 and self._frame -
self._lastTrialStart > self._maxFramesPerTrial)
def movePlayer(self, team, internal_num, pos, convertToExt=True): def movePlayer(self, team, internal_num, pos, convertToExt=True):
""" Move a player to a specified position. """ Move a player to a specified position.
Args: Args:
team: the team name of the player team: the team name of the player
interal_num: the player's internal number interal_num: the player's internal number
...@@ -524,20 +542,20 @@ class Trainer(object): ...@@ -524,20 +542,20 @@ class Trainer(object):
ends to setup for the next trial. ends to setup for the next trial.
""" """
# Always Move the offensive goalie # Always Move the offensive goalie to the left goal
self.movePlayer(self._offenseTeam, 0, [-0.5 * self.PITCH_LENGTH, 0]) self.movePlayer(self._offenseTeamName, 0, [-0.5 * self.PITCH_LENGTH, 0])
# Move the rest of the offense # Move the rest of the offense
for i in xrange(1, self._numOffense + 1): for i in xrange(1, self._numOffense + 1):
self.movePlayer(self._offenseTeam, i, self.getOffensiveResetPosition()) self.movePlayer(self._offenseTeamName, i, self.getOffensiveResetPosition())
# Move the agent to the ball # Move the agent to the ball
if self._agent and self._agentOnBall: if self._agentOnBall and self._offenseAgents > 0:
self.movePlayer(self._offenseTeam, 1, self._ballPosition) self.movePlayer(self._offenseTeamName, self._agentNumInt[0], self._ballPosition)
# Move the defensive goalie # Move the defensive goalie
if self._numDefense > 0: if self._numDefense > 0:
self.movePlayer(self._defenseTeam, 0, [0.5 * self.PITCH_LENGTH,0]) self.movePlayer(self._defenseTeamName, 0, [0.5 * self.PITCH_LENGTH,0])
# Move the rest of the defense # Move the rest of the defense
for i in xrange(1, self._numDefense): for i in xrange(1, self._numDefense):
self.movePlayer(self._defenseTeam, i, self.getDefensiveResetPosition()) self.movePlayer(self._defenseTeamName, i, self.getDefensiveResetPosition())
def removeNonHFOPlayers(self): def removeNonHFOPlayers(self):
"""Removes players that aren't involved in HFO game. """Removes players that aren't involved in HFO game.
...@@ -546,30 +564,28 @@ class Trainer(object): ...@@ -546,30 +564,28 @@ class Trainer(object):
are sent to left-field. are sent to left-field.
""" """
offensive_agent_numbers = self._agentNumInt[:self._offenseAgents]
defensive_agent_numbers = self._agentNumInt[self._offenseAgents:]
for i in xrange(self._numOffense + 1, 11): for i in xrange(self._numOffense + 1, 11):
# Don't remove the learning agent if i not in offensive_agent_numbers:
if not self._agent or i != self._agentNumInt or \ self.movePlayer(self._offenseTeamName, i, [-0.25 * self.PITCH_LENGTH, 0])
self._agentTeam != self._offenseTeam:
self.movePlayer(self._offenseTeam, i, [-0.25 * self.PITCH_LENGTH, 0])
for i in xrange(self._numDefense, 11): for i in xrange(self._numDefense, 11):
# Don't remove the learning agent if i not in defensive_agent_numbers:
if not self._agent or i != self._agentNumInt or \ self.movePlayer(self._defenseTeamName, i, [-0.25 * self.PITCH_LENGTH, 0])
self._agentTeam != self._defenseTeam:
self.movePlayer(self._defenseTeam, i, [-0.25 * self.PITCH_LENGTH, 0])
def step(self): def step(self):
""" Takes a simulated step. """ """ Takes a simulated step. """
# self.send('(check_ball)') # self.send('(check_ball)')
self.removeNonHFOPlayers() self.removeNonHFOPlayers()
team_holding_ball, player_holding_ball = self.calcBallHolder() self._teamHoldingBall, self._playerHoldingBall = self.calcBallHolder()
if team_holding_ball is not None: if self._teamHoldingBall is not None:
self._lastFrameBallTouched = self._frame self._lastFrameBallTouched = self._frame
if self.trialOver(team_holding_ball): if self.trialOver():
self.updateResults(team_holding_ball) self.updateResults()
self._lastFrameBallTouched = self._frame self._lastFrameBallTouched = self._frame
self.reset() self.reset()
def updateResults(self, team_holding_ball): def updateResults(self):
""" Updates the various members after a trial has ended. """ """ Updates the various members after a trial has ended. """
if self.isGoal(): if self.isGoal():
self._numGoals += 1 self._numGoals += 1
...@@ -579,11 +595,11 @@ class Trainer(object): ...@@ -579,11 +595,11 @@ class Trainer(object):
self._numBallsOOB += 1 self._numBallsOOB += 1
result = 'Out of Bounds' result = 'Out of Bounds'
self.send('(say OUT_OF_BOUNDS)') self.send('(say OUT_OF_BOUNDS)')
elif team_holding_ball not in [None,self._offenseTeamInd]: elif self.isCaptured():
self._numBallsCaptured += 1 self._numBallsCaptured += 1
result = 'Defense Captured' result = 'Defense Captured'
self.send('(say CAPTURED_BY_DEFENSE)') self.send('(say CAPTURED_BY_DEFENSE)')
elif self._frame - self._lastFrameBallTouched > self.UNTOUCHED_LENGTH: elif self.isOOT():
self._numOutOfTime += 1 self._numOutOfTime += 1
result = 'Ball untouched for too long' result = 'Ball untouched for too long'
self.send('(say OUT_OF_TIME)') self.send('(say OUT_OF_TIME)')
...@@ -600,7 +616,7 @@ class Trainer(object): ...@@ -600,7 +616,7 @@ class Trainer(object):
if (self._maxFrames > 0) and (self._numFrames >= self._maxFrames): if (self._maxFrames > 0) and (self._numFrames >= self._maxFrames):
raise DoneError raise DoneError
def trialOver(self, team_holding_ball): def trialOver(self):
"""Returns true if the trial has ended for one of the following """Returns true if the trial has ended for one of the following
reasons: Goal scored, out of bounds, captured by defense, or reasons: Goal scored, out of bounds, captured by defense, or
untouched for too long. untouched for too long.
...@@ -609,10 +625,7 @@ class Trainer(object): ...@@ -609,10 +625,7 @@ class Trainer(object):
# The trial is still being setup, it cannot be over. # The trial is still being setup, it cannot be over.
if self._frame - self._lastTrialStart < 5: if self._frame - self._lastTrialStart < 5:
return False return False
return self.isGoal() \ return self.isGoal() or self.isOOB() or self.isCaptured() or self.isOOT()
or self.isOOB() \
or (team_holding_ball not in [None, self._offenseTeamInd]) \
or (self._frame - self._lastFrameBallTouched > self.UNTOUCHED_LENGTH)
def printStats(self): def printStats(self):
print 'Num frames in completed trials : %i' % (self._numFrames) print 'Num frames in completed trials : %i' % (self._numFrames)
...@@ -634,18 +647,18 @@ class Trainer(object): ...@@ -634,18 +647,18 @@ class Trainer(object):
return True return True
def run(self, necProcesses): def run(self, necProcesses):
""" Run the trainer. """ Run the trainer """
"""
try: try:
if self._agent: for agent_num in xrange(self._offenseAgents):
if self._agent_play_offense: port = self._agentServerPort + agent_num
for i in xrange(self._numOffense): agent = self.launch_agent(agent_num, play_offense=True, port=port)
self._agentPopen.append(self.launch_agent(i)) self._agentPopen.append(agent)
necProcesses.append([self._agentPopen[i], 'agent'+str(i)]) necProcesses.append([agent, 'offense_agent_' + str(agent_num)])
else: for agent_num in xrange(self._defenseAgents):
for i in xrange(self._numDefense): port = self._agentServerPort + agent_num + self._offenseAgents
self._agentPopen.append(self.launch_agent(i)) agent = self.launch_agent(agent_num, play_offense=False, port=port)
necProcesses.append([self._agentPopen[i], 'agent'+str(i)]) self._agentPopen.append(agent)
necProcesses.append([agent, 'defense_agent_' + str(agent_num)])
self.startGame() self.startGame()
while self.checkLive(necProcesses): while self.checkLive(necProcesses):
prevFrame = self._frame prevFrame = self._frame
......
...@@ -45,10 +45,11 @@ def main(args, team1='left', team2='right', rng=numpy.random.RandomState()): ...@@ -45,10 +45,11 @@ def main(args, team1='left', team2='right', rng=numpy.random.RandomState()):
""" """
if not os.path.exists(args.logDir): if not os.path.exists(args.logDir):
os.makedirs(args.logDir) os.makedirs(args.logDir)
num_agents = args.offenseAgents + args.defenseAgents
binary_dir = os.path.dirname(os.path.realpath(__file__)) binary_dir = os.path.dirname(os.path.realpath(__file__))
server_port = args.port + 1 server_port = args.port + num_agents
coach_port = args.port + 2 coach_port = args.port + num_agents + 1
olcoach_port = args.port + 3 olcoach_port = args.port + num_agents + 2
serverOptions = ' server::port=%i server::coach_port=%i ' \ serverOptions = ' server::port=%i server::coach_port=%i ' \
'server::olcoach_port=%i server::coach=1 ' \ 'server::olcoach_port=%i server::coach=1 ' \
'server::game_logging=%i server::text_logging=%i ' \ 'server::game_logging=%i server::text_logging=%i ' \
...@@ -76,7 +77,8 @@ def main(args, team1='left', team2='right', rng=numpy.random.RandomState()): ...@@ -76,7 +77,8 @@ def main(args, team1='left', team2='right', rng=numpy.random.RandomState()):
launch(MONITOR_CMD + monitorOptions, name='monitor') launch(MONITOR_CMD + monitorOptions, name='monitor')
# Launch the Trainer # Launch the Trainer
from Trainer import Trainer from Trainer import Trainer
trainer = Trainer(args=args, rng=rng) trainer = Trainer(args=args, rng=rng, server_port=server_port,
coach_port=coach_port)
trainer.initComm() trainer.initComm()
# Start Team1 # Start Team1
launch(team1Cmd,False) launch(team1Cmd,False)
...@@ -109,15 +111,17 @@ def parseArgs(): ...@@ -109,15 +111,17 @@ def parseArgs():
help='Number of trials to run') help='Number of trials to run')
p.add_argument('--frames', dest='numFrames', type=int, default=-1, p.add_argument('--frames', dest='numFrames', type=int, default=-1,
help='Number of frames to run for') help='Number of frames to run for')
p.add_argument('--offense', dest='numOffense', type=int, default=4, p.add_argument('--frames-per-trial', dest='maxFramesPerTrial', type=int,
help='Number of offensive players') default=1000, help='Max number of frames per trial. '\
p.add_argument('--defense', dest='numDefense', type=int, default=4, 'Negative values mean unlimited.')
help='Number of defensive players') p.add_argument('--offense-agents', dest='offenseAgents', type=int, default=1,
p.add_argument('--play-defense', dest='play_offense', help='Number of offensive agents')
action='store_false', default=True, p.add_argument('--defense-agents', dest='defenseAgents', type=int, default=0,
help='Put the learning agent on defensive team') help='Number of defensive agents')
p.add_argument('--no-agent', dest='no_agent', action='store_true', p.add_argument('--offense-npcs', dest='offenseNPCs', type=int, default=0,
help='Don\'t use a learning agent.') help='Number of offensive uncontrolled players')
p.add_argument('--defense-npcs', dest='defenseNPCs', type=int, default=0,
help='Number of defensive uncontrolled players')
p.add_argument('--no-sync', dest='sync', action='store_false', default=True, p.add_argument('--no-sync', dest='sync', action='store_false', default=True,
help='Run server in non-sync mode') help='Run server in non-sync mode')
p.add_argument('--port', dest='port', type=int, default=6000, p.add_argument('--port', dest='port', type=int, default=6000,
...@@ -134,17 +138,10 @@ def parseArgs(): ...@@ -134,17 +138,10 @@ def parseArgs():
p.add_argument('--fullstate', dest='fullstate', action='store_true', p.add_argument('--fullstate', dest='fullstate', action='store_true',
help='Server provides full-state information to agents.') help='Server provides full-state information to agents.')
args = p.parse_args() args = p.parse_args()
if args.numOffense not in xrange(1, 11): assert args.offenseAgents + args.offenseNPCs in xrange(0, 11), \
p.error('argument --offense: invalid choice: ' + str(args.numOffense) + 'Invalid number of offensive players: (choose from [0,10])'
' (choose from [1-10])') assert args.defenseAgents + args.defenseNPCs in xrange(0, 12), \
if args.play_offense: 'Invalid number of defensive players: (choose from [0,11])'
if args.numDefense not in xrange(0, 12):
p.error('argument --defense: invalid choice: ' + str(args.numDefense) +
' (choose from [0-11])')
else:
if args.numDefense not in xrange(1, 12):
p.error('argument --defense: invalid choice: ' + str(args.numDefense) +
' (choose from [1-11])')
return args return args
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment