Commit df411ad4 authored by Matthew Hausknecht's avatar Matthew Hausknecht

Merge pull request #15 from mhauskn/mid_level_actions

Mid level actions
parents 24dc458f 682d0d7e
...@@ -71,6 +71,9 @@ bin/soccerwindow2 ...@@ -71,6 +71,9 @@ bin/soccerwindow2
example/hfo_example_agent example/hfo_example_agent
example/high_level_random_agent example/high_level_random_agent
example/low_level_random_agent example/low_level_random_agent
example/mid_level_move_agent
example/mid_level_kick_agent
example/mid_level_dribble_agent
# Dependency directories # Dependency directories
librcsc-prefix/ librcsc-prefix/
......
...@@ -109,6 +109,21 @@ target_link_libraries(low_level_random_agent hfo-lib) ...@@ -109,6 +109,21 @@ target_link_libraries(low_level_random_agent hfo-lib)
add_executable(high_level_random_agent ${CMAKE_CURRENT_SOURCE_DIR}/example/high_level_random_agent.cpp) add_executable(high_level_random_agent ${CMAKE_CURRENT_SOURCE_DIR}/example/high_level_random_agent.cpp)
set_target_properties(high_level_random_agent PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/example) set_target_properties(high_level_random_agent PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/example)
target_link_libraries(high_level_random_agent hfo-lib) target_link_libraries(high_level_random_agent hfo-lib)
add_executable(mid_level_move_agent ${CMAKE_CURRENT_SOURCE_DIR}/example/mid_level_move_agent.cpp)
set_target_properties(mid_level_move_agent PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/example)
target_link_libraries(mid_level_move_agent hfo-lib)
add_executable(mid_level_kick_agent ${CMAKE_CURRENT_SOURCE_DIR}/example/mid_level_kick_agent.cpp)
set_target_properties(mid_level_kick_agent PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/example)
target_link_libraries(mid_level_kick_agent hfo-lib)
add_executable(mid_level_dribble_agent ${CMAKE_CURRENT_SOURCE_DIR}/example/mid_level_dribble_agent.cpp)
set_target_properties(mid_level_dribble_agent PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/example)
target_link_libraries(mid_level_dribble_agent hfo-lib)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/example DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/ USE_SOURCE_PERMISSIONS) install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/example DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/ USE_SOURCE_PERMISSIONS)
install(DIRECTORY ${RCSSSERVER_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/bin DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/ USE_SOURCE_PERMISSIONS) install(DIRECTORY ${RCSSSERVER_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/bin DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/ USE_SOURCE_PERMISSIONS)
...@@ -48,13 +48,14 @@ def main(args, team1='left', team2='right'): ...@@ -48,13 +48,14 @@ def main(args, team1='left', team2='right'):
'server::fullstate_l=%i server::fullstate_r=%i ' \ 'server::fullstate_l=%i server::fullstate_r=%i ' \
'server::coach_w_referee=1 server::hfo_max_trial_time=%i ' \ 'server::coach_w_referee=1 server::hfo_max_trial_time=%i ' \
'server::hfo_max_trials=%i server::hfo_max_frames=%i ' \ 'server::hfo_max_trials=%i server::hfo_max_frames=%i ' \
'server::hfo_offense_on_ball=%i server::random_seed=%i' \ 'server::hfo_offense_on_ball=%i server::random_seed=%i ' \
'server::hfo_max_untouched_time=%i' \
%(server_port, coach_port, olcoach_port, %(server_port, coach_port, olcoach_port,
args.logging, args.logging, args.logging, args.logging, args.logging, args.logging,
args.logDir, args.logDir, args.logDir, args.logDir, args.logDir, args.logDir,
args.sync, args.fullstate, args.fullstate, args.sync, args.fullstate, args.fullstate,
args.maxFramesPerTrial, args.numTrials, args.numFrames, args.maxFramesPerTrial, args.numTrials, args.numFrames,
args.offenseOnBall, args.seed) args.offenseOnBall, args.seed, args.maxUntouchedTime)
# server::record_messages=on -- useful for debug # server::record_messages=on -- useful for debug
try: try:
# Launch the Server # Launch the Server
...@@ -101,6 +102,9 @@ def parseArgs(): ...@@ -101,6 +102,9 @@ def parseArgs():
p.add_argument('--frames-per-trial', dest='maxFramesPerTrial', type=int, p.add_argument('--frames-per-trial', dest='maxFramesPerTrial', type=int,
default=1000, help='Max number of frames per trial. '\ default=1000, help='Max number of frames per trial. '\
'Negative values mean unlimited.') 'Negative values mean unlimited.')
p.add_argument('--untouched-time', dest='maxUntouchedTime', type=int,
default=100, help='Ends trial if ball is untouched for this long. '\
'Negative values mean unlimited.')
p.add_argument('--offense-agents', dest='offenseAgents', type=int, default=0, p.add_argument('--offense-agents', dest='offenseAgents', type=int, default=0,
help='Number of offensive agents') help='Number of offensive agents')
p.add_argument('--defense-agents', dest='defenseAgents', type=int, default=0, p.add_argument('--defense-agents', dest='defenseAgents', type=int, default=0,
......
No preview for this file type
...@@ -164,9 +164,9 @@ A seed may be specified as follows:\\ ...@@ -164,9 +164,9 @@ A seed may be specified as follows:\\
This seed will determine the placement of the players and the ball at This seed will determine the placement of the players and the ball at
the beginning of each episode. Due to non-determinism in the player the beginning of each episode. Due to non-determinism in the player
policies and server, it is not sufficient to precisely replicate full policies, it is not sufficient to precisely replicate full games. It
games. It \textit{only} replicates the starting conditions for each \textit{only} replicates the starting conditions for each episode. The
episode. The player's behavior, observations, and physics all proceed player's behavior, observations, and physics all proceed
stochastically. stochastically.
\section{State Spaces} \section{State Spaces}
...@@ -193,16 +193,33 @@ are encoded a floating point values and normalized to the range of ...@@ -193,16 +193,33 @@ are encoded a floating point values and normalized to the range of
[-1,1]. Invalid features are given a value of -2. The features are as [-1,1]. Invalid features are given a value of -2. The features are as
follows: follows:
\begin{figure}[htp]
\centering
\includegraphics[width=.7\textwidth]{figures/playfieldCoords}
\caption{\textbf{Normalized Coordinates in the HFO play field}:
These coordinates are used for reporting the agent's position in
the high-level feature set as well specifying targets for the
mid-level actions (Section \ref{sec:mid_level_actions}). The
red-rectangle shows the boundaries of the reported positions,
which exceed the play field boundaries by 10\% in each
direction. Positions exceeding this rectangle are bounded (via
min/max) to the edges of the rectangle. All distance features are
normalized against the max HFO distance shown in orange.}
\label{fig:playfieldCoords}
\end{figure}
\subsubsection{High Level State Feature List} \subsubsection{High Level State Feature List}
\begin{enumerate} \begin{enumerate}
\setcounter{enumi}{-1} \setcounter{enumi}{-1}
\item{\textbf{X position} - The agent’s x position on the field.} \item{\textbf{X position} - The agent’s normalized x-position on the
\item{\textbf{Y position} - The agent’s y position on the field.} field. See Figure \ref{fig:playfieldCoords}.}
\item{\textbf{Y position} - The agent’s normalized y-position on the
field. See Figure \ref{fig:playfieldCoords}.}
\item{\textbf{Orientation} - The direction that the agent is facing.} \item{\textbf{Orientation} - The direction that the agent is facing.}
\item{\textbf{Ball Distance} - Distance to the ball.} \item{\textbf{Ball Distance} - Normalized distance to the ball.}
\item{\textbf{Ball Angle} - Angle to the ball.} \item{\textbf{Ball Angle} - Angle to the ball.}
\item{\textbf{Able to Kick} - Boolean indicating if the agent can kick the ball.} \item{\textbf{Able to Kick} - Boolean indicating if the agent can kick the ball.}
\item{\textbf{Goal Center Distance} - Distance from the agent to the center of the goal.} \item{\textbf{Goal Center Distance} - Normalized distance from the agent to the center of the goal.}
\item{\textbf{Goal Center Angle} - Angle from the agent to the center of the goal.} \item{\textbf{Goal Center Angle} - Angle from the agent to the center of the goal.}
\item{\textbf{Goal Opening Angle} - The size of the largest open angle \item{\textbf{Goal Opening Angle} - The size of the largest open angle
of the agent to the goal, shown as $\theta_g$ in Figure of the agent to the goal, shown as $\theta_g$ in Figure
...@@ -210,11 +227,11 @@ follows: ...@@ -210,11 +227,11 @@ follows:
\item [$T$] {\textbf{Teammate i's Goal Opening Angle} - For each \item [$T$] {\textbf{Teammate i's Goal Opening Angle} - For each
teammate i: i’s goal opening angle. Invalid if agent is not playing teammate i: i’s goal opening angle. Invalid if agent is not playing
offense.} offense.}
\item [$1$] {\textbf{Distance to Opponent} - If an opponent is present, \item [$1$] {\textbf{Distance to Opponent} - If an opponent is
distance to the closest opponent. This feature is absent if there present, normalized distance to the closest opponent. This feature
are no opponents.} is absent if there are no opponents.}
\item [$T$] {\textbf{Distance from Teammate i to Opponent} - For each \item [$T$] {\textbf{Distance from Teammate i to Opponent} - For each
teammate i: the distance from the teammate to the closest teammate i: the normalized distance from the teammate to the closest
opponent. This feature is absent if there are no opponents. If opponent. This feature is absent if there are no opponents. If
teammates are present but not detected, this feature is considered teammates are present but not detected, this feature is considered
invalid and given the value of -2.} invalid and given the value of -2.}
...@@ -223,8 +240,8 @@ follows: ...@@ -223,8 +240,8 @@ follows:
\ref{fig:openAngle}. If teammates are present but not detected, this \ref{fig:openAngle}. If teammates are present but not detected, this
feature is considered invalid and given the value of -2.} feature is considered invalid and given the value of -2.}
\item [$3T$] {\textbf{Distance, Angle, and Uniform Number of \item [$3T$] {\textbf{Distance, Angle, and Uniform Number of
Teammates} - For each teammate i: the distance, angle, and uniform Teammates} - For each teammate i: the normalized distance, angle,
number of that teammate.} and uniform number of that teammate.}
\end{enumerate} \end{enumerate}
There are a total of $9 + 5*\textrm{num\_teammates}$ features with an There are a total of $9 + 5*\textrm{num\_teammates}$ features with an
...@@ -243,7 +260,7 @@ opponent is present. ...@@ -243,7 +260,7 @@ opponent is present.
\subsection {Low Level Feature Set} \subsection {Low Level Feature Set}
The state features used by HFO are designed with the mindset of The state features used by HFO are designed with the mindset of
providing an overcomplete, basic, egocentric viewpoint. The features providing an over-complete, basic, egocentric viewpoint. The features
are basic in the sense that they provide distances and angles to are basic in the sense that they provide distances and angles to
relevant points of interest, but do not include higher level relevant points of interest, but do not include higher level
perceptions such as the largest angle between a goal post and perceptions such as the largest angle between a goal post and
...@@ -421,6 +438,7 @@ are available through the same interface. It is the responsibility of ...@@ -421,6 +438,7 @@ are available through the same interface. It is the responsibility of
the user to faithfully report which action spaces were used. the user to faithfully report which action spaces were used.
\subsection{Low Level Actions} \subsection{Low Level Actions}
\label{sec:low_level_actions}
\begin{itemize} \begin{itemize}
\item{\textbf{Dash}(power, degrees): Moves the agent with power [-100, \item{\textbf{Dash}(power, degrees): Moves the agent with power [-100,
100] where negative values move backwards. The relative direction 100] where negative values move backwards. The relative direction
...@@ -435,11 +453,28 @@ the user to faithfully report which action spaces were used. ...@@ -435,11 +453,28 @@ the user to faithfully report which action spaces were used.
\item{\textbf{Kick}(power, degrees): Kick the ball with power [0, 100] \item{\textbf{Kick}(power, degrees): Kick the ball with power [0, 100]
in relative direction [-180, 180]. Has no effect if the agent does in relative direction [-180, 180]. Has no effect if the agent does
not possess the ball.} not possess the ball.}
\item{\textbf{Quit}: Indicates to the agent server that you wish to \end{itemize}
terminate the HFO environment.}
\subsection{Mid Level Actions}
\label{sec:mid_level_actions}
\begin{itemize}
\item{\textbf{Kick$\_$To}(target$_x$, target$_y$, speed): Kicks the
ball to the specified target point with the desired speed. Valid
values for target$_{x,y} \in [-1,1]$ and speed $\in [0,3]$.}
\item{\textbf{Move$\_$To}(target$_x$, target$_y$): Moves to the
specified target point using the max dash speed. Valid values for
target$_{x,y} \in [-1,1]$.}
\item{\textbf{Dribble$\_$To}(target$_x$, target$_y$): Dribbles the
ball to the specified target point. Attempts to fetch the ball if
the agent doesn't already possess it. Performs some checks to avoid
opponents and keeps good control of the ball. Valid values for
target$_{x,y} \in [-1,1]$.}
\item{\textbf{Intercept}(): Moves to intercept the ball, taking into
account the ball velocity. More efficient than chasing the ball.}
\end{itemize} \end{itemize}
\subsection{High Level Actions} \subsection{High Level Actions}
\label{sec:high_level_actions}
\begin{itemize} \begin{itemize}
\item{\textbf{Move}(): Re-positions the agent according to the \item{\textbf{Move}(): Re-positions the agent according to the
strategy given by Agent2D. The \textit{move} command works only when strategy given by Agent2D. The \textit{move} command works only when
...@@ -456,6 +491,13 @@ the user to faithfully report which action spaces were used. ...@@ -456,6 +491,13 @@ the user to faithfully report which action spaces were used.
combination of short kicks and moves.} combination of short kicks and moves.}
\end{itemize} \end{itemize}
\subsection{Special Actions}
\begin{itemize}
\item{\textbf{NO-OP}: Indicates that the agent should take no action.}
\item{\textbf{Quit}: Indicates to the agent server that you wish to
terminate the HFO environment.}
\end{itemize}
\section{Developing a New Agent} \section{Developing a New Agent}
New agents may be developed in C++ or Python. In Python, as long as New agents may be developed in C++ or Python. In Python, as long as
......
...@@ -11,9 +11,9 @@ except: ...@@ -11,9 +11,9 @@ except:
exit() exit()
def get_random_action(): def get_random_action():
""" Returns a random high-level action """ """Returns a random high-level action. Pass is omitted for simplicity."""
high_lv_actions = [HFO_Actions.SHOOT, HFO_Actions.PASS, HFO_Actions.DRIBBLE] high_lv_actions = [HFO_Actions.SHOOT, HFO_Actions.DRIBBLE]
return (random.choice(high_lv_actions), 0, 0) return random.choice(high_lv_actions)
def play_hfo(num): def play_hfo(num):
""" Method called by a thread to play 5 games of HFO """ """ Method called by a thread to play 5 games of HFO """
...@@ -27,7 +27,7 @@ def play_hfo(num): ...@@ -27,7 +27,7 @@ def play_hfo(num):
if state[5] == 1: #state[5] is 1 when player has the ball if state[5] == 1: #state[5] is 1 when player has the ball
status = hfo_env.act(get_random_action()) status = hfo_env.act(get_random_action())
else: else:
status = hfo_env.act((HFO_Actions.MOVE, 0, 0)) status = hfo_env.act(HFO_Actions.MOVE)
except: except:
pass pass
finally: finally:
......
...@@ -22,9 +22,8 @@ int main() { ...@@ -22,9 +22,8 @@ int main() {
// Get the vector of state features for the current state // Get the vector of state features for the current state
const std::vector<float>& feature_vec = hfo.getState(); const std::vector<float>& feature_vec = hfo.getState();
// Create a dash action // Create a dash action
Action a = {DASH, 20.0, 0.0};
// Perform the dash and recieve the current game status // Perform the dash and recieve the current game status
status = hfo.act(a); status = hfo.act(DASH, 20.0);
} }
// Check what the outcome of the episode was // Check what the outcome of the episode was
cout << "Episode " << episode << " ended with status: "; cout << "Episode " << episode << " ended with status: ";
......
...@@ -22,7 +22,7 @@ if __name__ == '__main__': ...@@ -22,7 +22,7 @@ if __name__ == '__main__':
# Grab the state features from the environment # Grab the state features from the environment
features = hfo.getState() features = hfo.getState()
# Take an action and get the current game status # Take an action and get the current game status
status = hfo.act((HFO_Actions.DASH, 0, 0)) status = hfo.act(HFO_Actions.DASH, 20.0, 0)
print 'Episode', episode, 'ended with', print 'Episode', episode, 'ended with',
# Check what the outcome of the episode was # Check what the outcome of the episode was
if status == HFO_Status.GOAL: if status == HFO_Status.GOAL:
......
...@@ -10,10 +10,9 @@ using namespace hfo; ...@@ -10,10 +10,9 @@ using namespace hfo;
// $./bin/HFO --offense-agents 1 // $./bin/HFO --offense-agents 1
// Returns a random high-level action // Returns a random high-level action
Action get_random_high_lv_action() { action_t get_random_high_lv_action() {
action_t action_indx = (action_t) ((rand() % 4) + 4); action_t action_indx = (action_t) ((rand() % 4) + MOVE);
Action act = {action_indx, 0, 0}; return action_indx;
return act;
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
...@@ -32,10 +31,9 @@ int main(int argc, char** argv) { ...@@ -32,10 +31,9 @@ int main(int argc, char** argv) {
while (status == IN_GAME) { while (status == IN_GAME) {
// Get the vector of state features for the current state // Get the vector of state features for the current state
const vector<float>& feature_vec = hfo.getState(); const vector<float>& feature_vec = hfo.getState();
// Create a dash action // Perform the action and recieve the current game status
Action a = get_random_high_lv_action(); status = hfo.act(get_random_high_lv_action());
// Perform the dash and recieve the current game status
status = hfo.act(a);
} }
} }
hfo.act(QUIT);
}; };
...@@ -9,10 +9,12 @@ using namespace hfo; ...@@ -9,10 +9,12 @@ using namespace hfo;
// Before running this program, first Start HFO server: // Before running this program, first Start HFO server:
// $./bin/HFO --offense-agents 1 // $./bin/HFO --offense-agents 1
float arg1, arg2;
// Returns a random low-level action // Returns a random low-level action
Action get_random_low_lv_action() { action_t get_random_low_lv_action() {
action_t action_indx = (action_t) (rand() % 4); action_t action_indx = (action_t) ((rand() % 4) + DASH);
float arg1, arg2; std::cout << action_indx << std::endl;
switch (action_indx) { switch (action_indx) {
case DASH: case DASH:
arg1 = (rand() / float(RAND_MAX)) * 200 - 100; // power: [-100, 100] arg1 = (rand() / float(RAND_MAX)) * 200 - 100; // power: [-100, 100]
...@@ -34,8 +36,7 @@ Action get_random_low_lv_action() { ...@@ -34,8 +36,7 @@ Action get_random_low_lv_action() {
cout << "Invalid Action Index: " << action_indx; cout << "Invalid Action Index: " << action_indx;
break; break;
} }
Action act = {action_indx, arg1, arg2}; return action_indx;
return act;
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
...@@ -54,10 +55,8 @@ int main(int argc, char** argv) { ...@@ -54,10 +55,8 @@ int main(int argc, char** argv) {
while (status == IN_GAME) { while (status == IN_GAME) {
// Get the vector of state features for the current state // Get the vector of state features for the current state
const vector<float>& feature_vec = hfo.getState(); const vector<float>& feature_vec = hfo.getState();
// Create a dash action // Perform the action and recieve the current game status
Action a = get_random_low_lv_action(); status = hfo.act(get_random_low_lv_action(), arg1, arg2);
// Perform the dash and recieve the current game status
status = hfo.act(a);
} }
} }
}; };
#include <iostream>
#include <vector>
#include <HFO.hpp>
#include <cstdlib>
#include <math.h>
using namespace std;
using namespace hfo;
// This agent demonstrates the use of the DRIBBLE_TO action. Before
// running this program, first Start HFO server: $./bin/HFO
// --offense-agents 1
#define PI 3.14159265
int main(int argc, char** argv) {
int port = 6000;
if (argc > 1) {
port = atoi(argv[1]);
}
// Create the HFO environment
HFOEnvironment hfo;
// Connect to the agent's server on port 6000 and request low-level
// feature set. See manual for more information on feature sets.
hfo.connectToAgentServer(port, HIGH_LEVEL_FEATURE_SET);
for (int episode=0; ; episode++) {
status_t status = IN_GAME;
int step = 0;
while (status == IN_GAME) {
// Get the vector of state features for the current state
const vector<float>& feature_vec = hfo.getState();
// Dribble in a circle around center field
float target_x = sin((step % 360) * PI/180);
float target_y = cos((step % 360) * PI/180);
status = hfo.act(DRIBBLE_TO, target_x, target_y);
step += 2;
}
}
hfo.act(QUIT);
};
#include <iostream>
#include <vector>
#include <HFO.hpp>
#include <cstdlib>
#include <math.h>
using namespace std;
using namespace hfo;
// This agent demonstrates the use of the KICK_TO action. Before
// running this program, first Start HFO server: $./bin/HFO
// --offense-agents 1
int main(int argc, char** argv) {
int port = 6000;
if (argc > 1) {
port = atoi(argv[1]);
}
// Create the HFO environment
HFOEnvironment hfo;
// Connect to the agent's server on port 6000 and request low-level
// feature set. See manual for more information on feature sets.
hfo.connectToAgentServer(port, HIGH_LEVEL_FEATURE_SET);
for (int episode=0; ; episode++) {
status_t status = IN_GAME;
while (status == IN_GAME) {
// Get the vector of state features for the current state
const vector<float>& feature_vec = hfo.getState();
float x = feature_vec[0];
float y = feature_vec[1];
float dist_to_target = sqrt(x*x + y*y) * 3;
// Perform the action and recieve the current game status
bool able_to_kick = feature_vec[5] > 0;
if (able_to_kick) {
// Valid kick speed varies in the range [0, 3]
if (dist_to_target < .1) {
// Max power kick to goal
status = hfo.act(KICK_TO, 1., 0., 3.0);
} else {
// Kick to center of hfo field
status = hfo.act(KICK_TO, 0., 0., dist_to_target);
}
} else {
status = hfo.act(INTERCEPT);
}
}
}
hfo.act(QUIT);
};
#include <iostream>
#include <vector>
#include <HFO.hpp>
#include <cstdlib>
using namespace std;
using namespace hfo;
// This agent demonstrates the use of the MOVE_TO action to visit the
// corners of the play field. Before running this program, first Start
// HFO server: $./bin/HFO --offense-agents 1
int main(int argc, char** argv) {
int port = 6000;
if (argc > 1) {
port = atoi(argv[1]);
}
// Create the HFO environment
HFOEnvironment hfo;
// Connect to the agent's server on port 6000 and request low-level
// feature set. See manual for more information on feature sets.
hfo.connectToAgentServer(port, HIGH_LEVEL_FEATURE_SET);
float target_x = 1.0;
float target_y = 1.0;
for (int episode=0; ; episode++) {
status_t status = IN_GAME;
if (episode % 2 != 0) {
target_x *= -1;
} else {
target_y *= -1;
}
std::cout << "target (x,y) = " << target_x << ", " << target_y << std::endl;
while (status == IN_GAME) {
// Get the vector of state features for the current state
const vector<float>& feature_vec = hfo.getState();
// Perform the action and recieve the current game status
status = hfo.act(MOVE_TO, target_x, target_y);
}
}
hfo.act(QUIT);
};
...@@ -14,14 +14,20 @@ class HFO_Actions: ...@@ -14,14 +14,20 @@ class HFO_Actions:
[Low-Level] Turn(direction) [Low-Level] Turn(direction)
[Low-Level] Tackle(direction) [Low-Level] Tackle(direction)
[Low-Level] Kick(power, direction) [Low-Level] Kick(power, direction)
[Mid-Level] Kick_To(target_x, target_y, speed)
[Mid-Level] Move(target_x, target_y)
[Mid-Level] Dribble(target_x, target_y)
[Mid-Level] Intercept(): Intercept the ball
[High-Level] Move(): Reposition player according to strategy [High-Level] Move(): Reposition player according to strategy
[High-Level] Shoot(): Shoot the ball [High-Level] Shoot(): Shoot the ball
[High-Level] Pass(): Pass to the most open teammate [High-Level] Pass(teammate_unum): Pass to teammate
[High-Level] Dribble(): Offensive dribble [High-Level] Dribble(): Offensive dribble
QUIT NOOP(): Do Nothing
QUIT(): Quit the game
''' '''
DASH, TURN, TACKLE, KICK, MOVE, SHOOT, PASS, DRIBBLE, QUIT = range(9) DASH, TURN, TACKLE, KICK, KICK_TO, MOVE_TO, DRIBBLE_TO, INTERCEPT, \
MOVE, SHOOT, PASS, DRIBBLE, NOOP, QUIT = range(14)
class HFO_Status: class HFO_Status:
''' Current status of the HFO game. ''' ''' Current status of the HFO game. '''
...@@ -38,6 +44,24 @@ class HFOEnvironment(object): ...@@ -38,6 +44,24 @@ class HFOEnvironment(object):
self.numFeatures = None # Given by the server in handshake self.numFeatures = None # Given by the server in handshake
self.features = None # The state features self.features = None # The state features
def NumParams(self, action_type):
''' Returns the number of required parameters for each action type. '''
return {
HFO_Actions.DASH : 2,
HFO_Actions.TURN : 1,
HFO_Actions.TACKLE : 1,
HFO_Actions.KICK : 2,
HFO_Actions.KICK_TO : 3,
HFO_Actions.MOVE_TO : 2,
HFO_Actions.DRIBBLE_TO : 2,
HFO_Actions.INTERCEPT : 0,
HFO_Actions.MOVE : 0,
HFO_Actions.SHOOT : 0,
HFO_Actions.PASS : 1,
HFO_Actions.DRIBBLE : 0,
HFO_Actions.NOOP : 0,
HFO_Actions.QUIT : 0}.get(action_type, -1);
def connectToAgentServer(self, server_port=6000, def connectToAgentServer(self, server_port=6000,
feature_set=HFO_Features.HIGH_LEVEL_FEATURE_SET): feature_set=HFO_Features.HIGH_LEVEL_FEATURE_SET):
'''Connect to the server that controls the agent on the specified port. ''' '''Connect to the server that controls the agent on the specified port. '''
...@@ -92,9 +116,14 @@ class HFOEnvironment(object): ...@@ -92,9 +116,14 @@ class HFOEnvironment(object):
size numFeatures. ''' size numFeatures. '''
return self.features return self.features
def act(self, action): def act(self, *args):
''' Send an action and recieve the game status.''' ''' Send an action and recieve the game status.'''
self.socket.send(struct.pack("iff", *action)) assert len(args) > 0, 'Not enough arguments provided to act'
action_type = args[0]
n_params = self.NumParams(action_type)
assert n_params == len(args) - 1, 'Incorrect number of params to act: '\
'Required %d provided %d'%(n_params, len(args)-1)
self.socket.send(struct.pack('i'+'f'*n_params, *args))
# Get the current game status # Get the current game status
data = self.socket.recv(struct.calcsize("i")) data = self.socket.recv(struct.calcsize("i"))
status = struct.unpack("i", data)[0] status = struct.unpack("i", data)[0]
......
...@@ -3,7 +3,7 @@ import setuptools ...@@ -3,7 +3,7 @@ import setuptools
setuptools.setup( setuptools.setup(
name='hfo', name='hfo',
version='0.1.1', version='0.1.2',
packages=setuptools.find_packages(), packages=setuptools.find_packages(),
author='Matthew Hausknecht', author='Matthew Hausknecht',
author_email='matthew.hausknecht@gmail.com', author_email='matthew.hausknecht@gmail.com',
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <netdb.h> #include <netdb.h>
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <stdarg.h>
using namespace hfo; using namespace hfo;
...@@ -47,6 +48,41 @@ std::string HFOEnvironment::ActionToString(Action action) { ...@@ -47,6 +48,41 @@ std::string HFOEnvironment::ActionToString(Action action) {
return ss.str(); return ss.str();
}; };
int HFOEnvironment::NumParams(action_t action) {
switch (action) {
case DASH:
return 2;
case TURN:
return 1;
case TACKLE:
return 1;
case KICK:
return 2;
case KICK_TO:
return 3;
case MOVE_TO:
return 2;
case DRIBBLE_TO:
return 2;
case INTERCEPT:
return 0;
case MOVE:
return 0;
case SHOOT:
return 0;
case PASS:
return 1;
case DRIBBLE:
return 0;
case NOOP:
return 0;
case QUIT:
return 0;
}
std::cerr << "Unrecognized Action: " << action;
return -1;
}
bool HFOEnvironment::ParseConfig(const std::string& message, Config& config) { bool HFOEnvironment::ParseConfig(const std::string& message, Config& config) {
config.num_offense = -1; config.num_offense = -1;
config.num_defense = -1; config.num_defense = -1;
...@@ -203,14 +239,31 @@ const std::vector<float>& HFOEnvironment::getState() { ...@@ -203,14 +239,31 @@ const std::vector<float>& HFOEnvironment::getState() {
return feature_vec; return feature_vec;
} }
status_t HFOEnvironment::act(Action action) { status_t HFOEnvironment::act(action_t action, ...) {
status_t game_status; status_t game_status;
// Send the action // Send the action_type
if (send(sockfd, &action, sizeof(Action), 0) < 0) { if (send(sockfd, &action, sizeof(action_t), 0) < 0) {
perror("[Agent Client] ERROR sending from socket"); perror("[Agent Client] ERROR sending from socket");
close(sockfd); close(sockfd);
exit(1); exit(1);
} }
// Send the arguments
int n_args = NumParams(action);
if (n_args > 0) {
float params[n_args];
va_list vl;
va_start(vl, action);
for (int i = 0; i < n_args; ++i) {
params[i] = va_arg(vl, double);
}
va_end(vl);
// Send the arguments
if (send(sockfd, &params, sizeof(float) * n_args, 0) < 0) {
perror("[Agent Client] ERROR sending from socket");
close(sockfd);
exit(1);
}
}
// Get the game status // Get the game status
if (recv(sockfd, &game_status, sizeof(status_t), 0) < 0) { if (recv(sockfd, &game_status, sizeof(status_t), 0) < 0) {
perror("[Agent Client] ERROR recieving from socket"); perror("[Agent Client] ERROR recieving from socket");
......
...@@ -17,14 +17,19 @@ enum feature_set_t ...@@ -17,14 +17,19 @@ enum feature_set_t
// The actions available to the agent // The actions available to the agent
enum action_t enum action_t
{ {
DASH, // [Low-Level] Dash(power, relative_direction) DASH, // [Low-Level] Dash(power [0,100], direction [-180,180])
TURN, // [Low-Level] Turn(direction) TURN, // [Low-Level] Turn(direction [-180,180])
TACKLE, // [Low-Level] Tackle(direction) TACKLE, // [Low-Level] Tackle(direction [-180,180])
KICK, // [Low-Level] Kick(power, direction) KICK, // [Low-Level] Kick(power [0,100], direction [-180,180])
KICK_TO, // [Mid-Level] Kick_To(target_x [-1,1], target_y [-1,1], speed [0,3])
MOVE_TO, // [Mid-Level] Move(target_x [-1,1], target_y [-1,1])
DRIBBLE_TO, // [Mid-Level] Dribble(target_x [-1,1], target_y [-1,1])
INTERCEPT, // [Mid-Level] Intercept(): Intercept the ball
MOVE, // [High-Level] Move(): Reposition player according to strategy MOVE, // [High-Level] Move(): Reposition player according to strategy
SHOOT, // [High-Level] Shoot(): Shoot the ball SHOOT, // [High-Level] Shoot(): Shoot the ball
PASS, // [High-Level] Pass(teammate_unum): Pass to the most open teammate PASS, // [High-Level] Pass(teammate_unum [0,11]): Pass to the most open teammate
DRIBBLE, // [High-Level] Dribble(): Offensive dribble DRIBBLE, // [High-Level] Dribble(): Offensive dribble
NOOP, // Do nothing
QUIT // Special action to quit the game QUIT // Special action to quit the game
}; };
...@@ -65,6 +70,9 @@ class HFOEnvironment { ...@@ -65,6 +70,9 @@ class HFOEnvironment {
// Returns a string representation of an action. // Returns a string representation of an action.
static std::string ActionToString(Action action); static std::string ActionToString(Action action);
// Get the number of parameters needed for a action.
static int NumParams(action_t action);
// Parse a Trainer message to populate config. Returns a bool // Parse a Trainer message to populate config. Returns a bool
// indicating if the struct was correctly parsed. // indicating if the struct was correctly parsed.
static bool ParseConfig(const std::string& message, Config& config); static bool ParseConfig(const std::string& message, Config& config);
...@@ -77,7 +85,7 @@ class HFOEnvironment { ...@@ -77,7 +85,7 @@ class HFOEnvironment {
const std::vector<float>& getState(); const std::vector<float>& getState();
// Take an action and recieve the resulting game status // Take an action and recieve the resulting game status
status_t act(Action action); status_t act(action_t action, ...);
protected: protected:
int numFeatures; // The number of features in this domain int numFeatures; // The number of features in this domain
......
...@@ -425,38 +425,66 @@ void Agent::actionImpl() { ...@@ -425,38 +425,66 @@ void Agent::actionImpl() {
exit(1); exit(1);
} }
// Get the action // Get the action type
Action action; action_t action;
if (recv(newsockfd, &action, sizeof(Action), 0) < 0) { if (recv(newsockfd, &action, sizeof(action_t), 0) < 0) {
perror("[Agent Server] ERROR recv from socket"); perror("[Agent Server] ERROR recv from socket");
close(sockfd); close(sockfd);
exit(1); exit(1);
} }
if (action.action == SHOOT) { // Get the parameters for that action
int n_args = HFOEnvironment::NumParams(action);
float params[n_args];
if (n_args > 0) {
if (recv(newsockfd, &params, sizeof(float)*n_args, 0) < 0) {
perror("[Agent Server] ERROR recv from socket");
close(sockfd);
exit(1);
}
}
if (action == SHOOT) {
const ShootGenerator::Container & cont = const ShootGenerator::Container & cont =
ShootGenerator::instance().courses(this->world(), false); ShootGenerator::instance().courses(this->world(), false);
ShootGenerator::Container::const_iterator best_shoot ShootGenerator::Container::const_iterator best_shoot
= std::min_element(cont.begin(), cont.end(), ShootGenerator::ScoreCmp()); = std::min_element(cont.begin(), cont.end(), ShootGenerator::ScoreCmp());
Body_SmartKick(best_shoot->target_point_, best_shoot->first_ball_speed_, Body_SmartKick(best_shoot->target_point_, best_shoot->first_ball_speed_,
best_shoot->first_ball_speed_ * 0.99, 3).execute(this); best_shoot->first_ball_speed_ * 0.99, 3).execute(this);
} else if (action.action == PASS) { } else if (action == PASS) {
Force_Pass pass; Force_Pass pass;
int receiver = int(action.arg1); int receiver = int(params[0]);
pass.get_pass_to_player(this->world(), receiver); pass.get_pass_to_player(this->world(), receiver);
pass.execute(this); pass.execute(this);
} }
switch(action.action) { switch(action) {
case DASH: case DASH:
this->doDash(action.arg1, action.arg2); this->doDash(params[0], params[1]);
break; break;
case TURN: case TURN:
this->doTurn(action.arg1); this->doTurn(params[0]);
break; break;
case TACKLE: case TACKLE:
this->doTackle(action.arg1, false); this->doTackle(params[0], false);
break; break;
case KICK: case KICK:
this->doKick(action.arg1, action.arg2); this->doKick(params[0], params[1]);
break;
case KICK_TO:
Body_SmartKick(Vector2D(feature_extractor->absoluteXPos(params[0]),
feature_extractor->absoluteYPos(params[1])),
params[2], params[2] * 0.99, 3).execute(this);
break;
case MOVE_TO:
Body_GoToPoint(Vector2D(feature_extractor->absoluteXPos(params[0]),
feature_extractor->absoluteYPos(params[1])), 0.25,
ServerParam::i().maxDashPower()).execute(this);
break;
case DRIBBLE_TO:
Body_Dribble(Vector2D(feature_extractor->absoluteXPos(params[0]),
feature_extractor->absoluteYPos(params[1])), 1.0,
ServerParam::i().maxDashPower(), 2).execute(this);
break;
case INTERCEPT:
Body_Intercept().execute(this);
break; break;
case MOVE: case MOVE:
this->doMove(); this->doMove();
...@@ -468,13 +496,15 @@ void Agent::actionImpl() { ...@@ -468,13 +496,15 @@ void Agent::actionImpl() {
case DRIBBLE: case DRIBBLE:
this->doDribble(); this->doDribble();
break; break;
case NOOP:
break;
case QUIT: case QUIT:
std::cout << "[Agent Server] Got quit from agent." << std::endl; std::cout << "[Agent Server] Got quit from agent." << std::endl;
close(sockfd); close(sockfd);
exit(0); exit(0);
default: default:
std::cerr << "[Agent Server] ERROR Unsupported Action: " std::cerr << "[Agent Server] ERROR Unsupported Action: "
<< action.action << std::endl; << action << std::endl;
close(sockfd); close(sockfd);
exit(1); exit(1);
} }
......
...@@ -9,8 +9,13 @@ ...@@ -9,8 +9,13 @@
using namespace rcsc; using namespace rcsc;
FeatureExtractor::FeatureExtractor() : FeatureExtractor::FeatureExtractor(int num_teammates,
numFeatures(-1) int num_opponents,
bool playing_offense) :
numFeatures(-1),
numTeammates(num_teammates),
numOpponents(num_opponents),
playingOffense(playing_offense)
{ {
const ServerParam& SP = ServerParam::i(); const ServerParam& SP = ServerParam::i();
...@@ -92,17 +97,35 @@ void FeatureExtractor::addFeature(float val) { ...@@ -92,17 +97,35 @@ void FeatureExtractor::addFeature(float val) {
feature_vec[featIndx++] = val; feature_vec[featIndx++] = val;
} }
void FeatureExtractor::addNormFeature(float val, float min_val, float max_val) { float FeatureExtractor::normalize(float val, float min_val, float max_val) {
assert(featIndx < numFeatures);
if (val < min_val || val > max_val) { if (val < min_val || val > max_val) {
std::cout << "Feature " << featIndx << " Violated Feature Bounds: " << val std::cout << "Feature " << featIndx << " Violated Feature Bounds: " << val
<< " Expected min/max: [" << min_val << ", " << max_val << "]" << std::endl; << " Expected min/max: [" << min_val << ", "
<< max_val << "]" << std::endl;
val = std::min(std::max(val, min_val), max_val); val = std::min(std::max(val, min_val), max_val);
} }
feature_vec[featIndx++] = ((val - min_val) / (max_val - min_val)) return ((val - min_val) / (max_val - min_val))
* (FEAT_MAX - FEAT_MIN) + FEAT_MIN; * (FEAT_MAX - FEAT_MIN) + FEAT_MIN;
} }
float FeatureExtractor::unnormalize(float val, float min_val, float max_val) {
if (val < FEAT_MIN || val > FEAT_MAX) {
std::cout << "Unnormalized value Violated Feature Bounds: " << val
<< " Expected min/max: [" << FEAT_MIN << ", "
<< FEAT_MAX << "]" << std::endl;
float ft_max = FEAT_MAX; // Linker error on OSX otherwise...?
float ft_min = FEAT_MIN;
val = std::min(std::max(val, ft_min), ft_max);
}
return ((val - FEAT_MIN) / (FEAT_MAX - FEAT_MIN))
* (max_val - min_val) + min_val;
}
void FeatureExtractor::addNormFeature(float val, float min_val, float max_val) {
assert(featIndx < numFeatures);
feature_vec[featIndx++] = normalize(val, min_val, max_val);
}
void FeatureExtractor::checkFeatures() { void FeatureExtractor::checkFeatures() {
assert(feature_vec.size() == numFeatures); assert(feature_vec.size() == numFeatures);
for (int i=0; i<numFeatures; ++i) { for (int i=0; i<numFeatures; ++i) {
...@@ -241,3 +264,33 @@ void FeatureExtractor::splitAngles(std::vector<OpenAngle> &openAngles, ...@@ -241,3 +264,33 @@ void FeatureExtractor::splitAngles(std::vector<OpenAngle> &openAngles,
} }
openAngles = resAngles; openAngles = resAngles;
} }
float FeatureExtractor::normalizedXPos(float absolute_x_pos) {
float tolerance_x = .1 * pitchHalfLength;
if (playingOffense) {
return normalize(absolute_x_pos, -tolerance_x, pitchHalfLength + tolerance_x);
} else {
return normalize(absolute_x_pos, -pitchHalfLength-tolerance_x, tolerance_x);
}
}
float FeatureExtractor::normalizedYPos(float absolute_y_pos) {
float tolerance_y = .1 * pitchHalfWidth;
return normalize(absolute_y_pos, -pitchHalfWidth - tolerance_y,
pitchHalfWidth + tolerance_y);
}
float FeatureExtractor::absoluteXPos(float normalized_x_pos) {
float tolerance_x = .1 * pitchHalfLength;
if (playingOffense) {
return unnormalize(normalized_x_pos, -tolerance_x, pitchHalfLength + tolerance_x);
} else {
return unnormalize(normalized_x_pos, -pitchHalfLength-tolerance_x, tolerance_x);
}
}
float FeatureExtractor::absoluteYPos(float normalized_y_pos) {
float tolerance_y = .1 * pitchHalfWidth;
return unnormalize(normalized_y_pos, -pitchHalfWidth - tolerance_y,
pitchHalfWidth + tolerance_y);
}
...@@ -8,7 +8,7 @@ typedef std::pair<float, float> OpenAngle; ...@@ -8,7 +8,7 @@ typedef std::pair<float, float> OpenAngle;
class FeatureExtractor { class FeatureExtractor {
public: public:
FeatureExtractor(); FeatureExtractor(int num_teammates, int num_opponents, bool playing_offense);
virtual ~FeatureExtractor(); virtual ~FeatureExtractor();
// Updated the state features stored in feature_vec // Updated the state features stored in feature_vec
...@@ -68,6 +68,12 @@ public: ...@@ -68,6 +68,12 @@ public:
float oppAngleBottom, float oppAngleBottom,
float oppAngleTop); float oppAngleTop);
// Convert back and forth between normalized and absolute x,y postions
float normalizedXPos(float absolute_x_pos);
float normalizedYPos(float absolute_y_pos);
float absoluteXPos(float normalized_x_pos);
float absoluteYPos(float normalized_y_pos);
protected: protected:
// Encodes an angle feature as the sin and cosine of that angle, // Encodes an angle feature as the sin and cosine of that angle,
// effectively transforming a single angle into two features. // effectively transforming a single angle into two features.
...@@ -92,6 +98,11 @@ protected: ...@@ -92,6 +98,11 @@ protected:
// Add a feature without normalizing // Add a feature without normalizing
void addFeature(float val); void addFeature(float val);
// Returns a normalized feature value
float normalize(float val, float min_val, float max_val);
// Converts a normalized feature value back into original space
float unnormalize(float val, float min_val, float max_val);
// Add a feature and normalize to the range [FEAT_MIN, FEAT_MAX] // Add a feature and normalize to the range [FEAT_MIN, FEAT_MAX]
void addNormFeature(float val, float min_val, float max_val); void addNormFeature(float val, float min_val, float max_val);
...@@ -118,6 +129,9 @@ protected: ...@@ -118,6 +129,9 @@ protected:
// Useful measures defined by the Server Parameters // Useful measures defined by the Server Parameters
float pitchLength, pitchWidth, pitchHalfLength, pitchHalfWidth, float pitchLength, pitchWidth, pitchHalfLength, pitchHalfWidth,
goalHalfWidth, penaltyAreaLength, penaltyAreaWidth; goalHalfWidth, penaltyAreaLength, penaltyAreaWidth;
int numTeammates; // Number of teammates in HFO
int numOpponents; // Number of opponents in HFO
bool playingOffense; // Are we playing offense or defense?
}; };
#endif // FEATURE_EXTRACTOR_H #endif // FEATURE_EXTRACTOR_H
...@@ -10,10 +10,7 @@ using namespace rcsc; ...@@ -10,10 +10,7 @@ using namespace rcsc;
HighLevelFeatureExtractor::HighLevelFeatureExtractor(int num_teammates, HighLevelFeatureExtractor::HighLevelFeatureExtractor(int num_teammates,
int num_opponents, int num_opponents,
bool playing_offense) : bool playing_offense) :
FeatureExtractor(), FeatureExtractor(num_teammates, num_opponents, playing_offense)
numTeammates(num_teammates),
numOpponents(num_opponents),
playingOffense(playing_offense)
{ {
assert(numTeammates >= 0); assert(numTeammates >= 0);
assert(numOpponents >= 0); assert(numOpponents >= 0);
......
...@@ -25,9 +25,6 @@ protected: ...@@ -25,9 +25,6 @@ protected:
const static int num_basic_features = 9; const static int num_basic_features = 9;
// Number of features for each player or opponent in game. // Number of features for each player or opponent in game.
const static int features_per_teammate = 5; const static int features_per_teammate = 5;
int numTeammates; // Number of teammates in HFO
int numOpponents; // Number of opponents in HFO
bool playingOffense; // Are we playing offense or defense?
}; };
#endif // HIGHLEVEL_FEATURE_EXTRACTOR_H #endif // HIGHLEVEL_FEATURE_EXTRACTOR_H
...@@ -10,10 +10,7 @@ using namespace rcsc; ...@@ -10,10 +10,7 @@ using namespace rcsc;
LowLevelFeatureExtractor::LowLevelFeatureExtractor(int num_teammates, LowLevelFeatureExtractor::LowLevelFeatureExtractor(int num_teammates,
int num_opponents, int num_opponents,
bool playing_offense) : bool playing_offense) :
FeatureExtractor(), FeatureExtractor(num_teammates, num_opponents, playing_offense)
numTeammates(num_teammates),
numOpponents(num_opponents),
playingOffense(playing_offense)
{ {
assert(numTeammates >= 0); assert(numTeammates >= 0);
assert(numOpponents >= 0); assert(numOpponents >= 0);
......
...@@ -19,9 +19,6 @@ protected: ...@@ -19,9 +19,6 @@ protected:
const static int num_basic_features = 58; const static int num_basic_features = 58;
// Number of features for each player or opponent in game. // Number of features for each player or opponent in game.
const static int features_per_player = 8; const static int features_per_player = 8;
int numTeammates; // Number of teammates in HFO
int numOpponents; // Number of opponents in HFO
bool playingOffense; // Are we playing offense or defense?
}; };
#endif // LOWLEVEL_FEATURE_EXTRACTOR_H #endif // LOWLEVEL_FEATURE_EXTRACTOR_H
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment