Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
Seminar-HFO
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
Seminar-HFO
Commits
08e20ef6
Commit
08e20ef6
authored
Oct 23, 2017
by
DurgeshSamant
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added python wrapper over sarsa libraries for offense agent
parent
952088ef
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
364 additions
and
6 deletions
+364
-6
example/sarsa_libraries/funcapprox/Makefile
example/sarsa_libraries/funcapprox/Makefile
+5
-5
example/sarsa_libraries/policy/Makefile
example/sarsa_libraries/policy/Makefile
+1
-1
example/sarsa_libraries/python_wrapper/FA_C_wrapper.cpp
example/sarsa_libraries/python_wrapper/FA_C_wrapper.cpp
+1
-0
example/sarsa_libraries/python_wrapper/FA_C_wrapper.h
example/sarsa_libraries/python_wrapper/FA_C_wrapper.h
+17
-0
example/sarsa_libraries/python_wrapper/Policy_C_wrapper.cpp
example/sarsa_libraries/python_wrapper/Policy_C_wrapper.cpp
+1
-0
example/sarsa_libraries/python_wrapper/Policy_C_wrapper.h
example/sarsa_libraries/python_wrapper/Policy_C_wrapper.h
+34
-0
example/sarsa_libraries/python_wrapper/py_wrapper.py
example/sarsa_libraries/python_wrapper/py_wrapper.py
+102
-0
example/sarsa_offense/high_level_sarsa_agent.py
example/sarsa_offense/high_level_sarsa_agent.py
+144
-0
example/simulate_python_sarsa_agents.sh
example/simulate_python_sarsa_agents.sh
+59
-0
No files found.
example/sarsa_libraries/funcapprox/Makefile
View file @
08e20ef6
#Flags
CXXFLAGS
=
-
g
-O3
-Wall
CXXFLAGS
=
-
shared
-g
-O3
-Wall
-fPIC
-lpython2
.7
#Compiler
CXX
=
g++
#Sources
SRCS
=
FuncApprox.cpp tiles2.cpp CMAC.cpp
SRCS
=
FuncApprox.cpp tiles2.cpp CMAC.cpp
#Objects
OBJS
=
$(SRCS:.cpp=.o)
...
...
@@ -18,11 +18,11 @@ TARGET = libfuncapprox.a
all
:
$(TARGET)
.cpp.o
:
$(CXX)
$(CXXFLAGS)
-c
-o
$@
$
(
@F:%.o
=
%.cpp
)
$(CXX)
$(CXXFLAGS)
-c
-o
$@
$
(
@F:%.o
=
%.cpp
)
$(TARGET)
:
$(OBJS)
ar cq
$@
$(OBJS)
ar cq
$@
$(OBJS)
;
#g++ -shared -o libFA.so libfuncapprox.a ;
clean
:
rm
-f
$(TARGET)
$(OBJS)
*
~
rm
-f
$(TARGET)
$(OBJS)
*
~
;
#rm *.so;
example/sarsa_libraries/policy/Makefile
View file @
08e20ef6
...
...
@@ -5,7 +5,7 @@ FA_DIR = ../funcapprox
INCLUDES
=
-I
$(FA_DIR)
#Flags
CXXFLAGS
=
-
g
-O3
-Wall
CXXFLAGS
=
-
shared
-g
-O3
-Wall
-fPIC
-lpython2
.7
#Compiler
CXX
=
g++
...
...
example/sarsa_libraries/python_wrapper/FA_C_wrapper.cpp
0 → 100644
View file @
08e20ef6
#include "FA_C_wrapper.h"
example/sarsa_libraries/python_wrapper/FA_C_wrapper.h
0 → 100644
View file @
08e20ef6
#ifndef __FA_C_WRAPPER_H__
#define __FA_C_WRAPPER_H__
#include "CMAC.h"
#include<iostream>
extern
"C"
{
void
*
CMAC_new
(
int
numF
,
int
numA
,
double
r
[],
double
m
[],
double
res
[])
{
// std::cout<<"FA_C_WRAPPER: CMAC_new"<<std::endl;
CMAC
*
ca
=
new
CMAC
(
numF
,
numA
,
r
,
m
,
res
);
void
*
ptr
=
reinterpret_cast
<
void
*>
(
ca
);
return
ptr
;
}
}
#endif
example/sarsa_libraries/python_wrapper/Policy_C_wrapper.cpp
0 → 100644
View file @
08e20ef6
#include "Policy_C_wrapper.h"
example/sarsa_libraries/python_wrapper/Policy_C_wrapper.h
0 → 100644
View file @
08e20ef6
#ifndef __POLICY_C_WRAPPER_H__
#define __POLICY_C_WRAPPER_H__
#include "SarsaAgent.h"
#include "FuncApprox.h"
#include "CMAC.h"
#include<iostream>
extern
"C"
{
void
*
SarsaAgent_new
(
int
numFeatures
,
int
numActions
,
double
learningRate
,
double
epsilon
,
double
lambda
,
void
*
FA
,
char
*
loadWeightsFile
,
char
*
saveWeightsFile
)
{
CMAC
*
fa
=
reinterpret_cast
<
CMAC
*>
(
FA
);
SarsaAgent
*
sa
=
new
SarsaAgent
(
numFeatures
,
numActions
,
learningRate
,
epsilon
,
lambda
,
fa
,
loadWeightsFile
,
saveWeightsFile
);
void
*
ptr
=
reinterpret_cast
<
void
*>
(
sa
);
return
ptr
;
}
void
SarsaAgent_update
(
void
*
ptr
,
double
state
[],
int
action
,
double
reward
,
double
discountFactor
)
{
SarsaAgent
*
p
=
reinterpret_cast
<
SarsaAgent
*>
(
ptr
);
p
->
update
(
state
,
action
,
reward
,
discountFactor
);
}
int
SarsaAgent_selectAction
(
void
*
ptr
,
double
state
[])
{
SarsaAgent
*
p
=
reinterpret_cast
<
SarsaAgent
*>
(
ptr
);
int
action
=
p
->
selectAction
(
state
);
return
action
;
}
void
SarsaAgent_endEpisode
(
void
*
ptr
)
{
SarsaAgent
*
p
=
reinterpret_cast
<
SarsaAgent
*>
(
ptr
);
p
->
endEpisode
();
}
}
#endif
example/sarsa_libraries/python_wrapper/py_wrapper.py
0 → 100644
View file @
08e20ef6
from
ctypes
import
*
import
numpy
as
np
import
getpass
import
sys
,
os
isPy3
=
False
if
sys
.
version_info
[
0
]
==
3
:
isPy3
=
True
username
=
getpass
.
getuser
()
libs
=
cdll
.
LoadLibrary
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'C_wrappers.so'
))
libs
.
CMAC_new
.
argtypes
=
[
c_int
,
c_int
,
POINTER
(
c_double
),
POINTER
(
c_double
),
POINTER
(
c_double
)]
libs
.
CMAC_new
.
restype
=
c_void_p
libs
.
SarsaAgent_new
.
argtypes
=
[
c_int
,
c_int
,
c_double
,
c_double
,
c_double
,
c_void_p
,
c_char_p
,
c_char_p
]
libs
.
SarsaAgent_new
.
restype
=
c_void_p
libs
.
SarsaAgent_update
.
argtypes
=
[
c_void_p
,
POINTER
(
c_double
),
c_int
,
c_double
,
c_double
]
libs
.
SarsaAgent_update
.
restype
=
None
libs
.
SarsaAgent_selectAction
.
argtypes
=
[
c_void_p
,
POINTER
(
c_double
)]
libs
.
SarsaAgent_selectAction
.
restype
=
c_int
libs
.
SarsaAgent_endEpisode
.
argtypes
=
[
c_void_p
]
libs
.
SarsaAgent_endEpisode
.
restype
=
None
class
CMAC
(
object
):
def
__init__
(
self
,
numF
,
numA
,
r
,
m
,
res
):
arr1
=
(
c_double
*
len
(
r
))(
*
r
)
arr2
=
(
c_double
*
len
(
m
))(
*
m
)
arr3
=
(
c_double
*
len
(
res
))(
*
res
)
self
.
obj
=
libs
.
CMAC_new
(
c_int
(
numF
),
c_int
(
numA
),
arr1
,
arr2
,
arr3
)
#print(self.obj)
class
SarsaAgent
(
object
):
def
__init__
(
self
,
numFeatures
,
numActions
,
learningRate
,
epsilon
,
Lambda
,
FA
,
loadWeightsFile
,
saveWeightsFile
):
p1
=
c_int
(
numFeatures
)
p2
=
c_int
(
numActions
)
p3
=
c_double
(
learningRate
)
p4
=
c_double
(
epsilon
)
p5
=
c_double
(
Lambda
)
p6
=
c_void_p
(
FA
.
obj
)
if
isPy3
:
#utf-8 encoding required for python3
p7
=
c_char_p
(
loadWeightsFile
.
encode
(
'utf-8'
))
p8
=
c_char_p
(
saveWeightsFile
.
encode
(
'utf-8'
))
else
:
#non encoded will do for python2
p7
=
c_char_p
(
loadWeightsFile
)
p8
=
c_char_p
(
saveWeightsFile
)
self
.
obj
=
libs
.
SarsaAgent_new
(
p1
,
p2
,
p3
,
p4
,
p5
,
p6
,
p7
,
p8
)
#print(format(self.obj,'02x'))
def
update
(
self
,
state
,
action
,
reward
,
discountFactor
):
s
=
(
c_double
*
len
(
state
))(
*
state
)
a
=
c_int
(
action
)
r
=
c_double
(
reward
)
df
=
c_double
(
discountFactor
)
libs
.
SarsaAgent_update
(
c_void_p
(
self
.
obj
),
s
,
a
,
r
,
df
)
#print(format(self.obj,'02x'))
def
selectAction
(
self
,
state
):
s
=
(
c_double
*
len
(
state
))(
*
state
)
action
=
libs
.
SarsaAgent_selectAction
(
c_void_p
(
self
.
obj
),
s
)
#print(action)
#print(format(self.obj,'02x'))
return
int
(
action
)
def
endEpisode
(
self
):
libs
.
SarsaAgent_endEpisode
(
c_void_p
(
self
.
obj
))
#print(format(self.obj,'02x'))
#NF=8
#NA=2 #PASS to each teammate, SHOOT, DRIBBLE
#discFac=1.0
#resolution=0.1
#eps=0.01
#NOT=0
#Lambda=0.0
#learnR=0.1
#Min=[]
#Range=[]
#Res=[]
#for i in range(NF):
# Min.append(-1.0)
# Range.append(2.0)
# Res.append(resolution)
#
#FA=CMAC(NF, NA, Range, Min, Res)
#SA=SarsaAgent(NF, NA, learnR, eps, Lambda, FA.obj, "", "")
#SA.endEpisode()
example/sarsa_offense/high_level_sarsa_agent.py
0 → 100755
View file @
08e20ef6
#!/usr/bin/env python3
# encoding: utf-8
from
hfo
import
*
import
argparse
import
numpy
as
np
import
sys
,
os
sys
.
path
.
append
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'..'
,
'sarsa_libraries'
,
'python_wrapper'
))
from
py_wrapper
import
*
NA
=
0
#Number of actions
NOT
=
0
#Number of teammates
NF
=
0
#Number of features
def
getReward
(
s
):
reward
=
0
#---------------------------
if
s
==
GOAL
:
reward
=
1
#---------------------------
elif
s
==
CAPTURED_BY_DEFENSE
:
reward
=-
1
#---------------------------
elif
s
==
OUT_OF_BOUNDS
:
reward
=-
1
#---------------------------
#Cause Unknown Do Nothing
elif
s
==
OUT_OF_TIME
:
reward
=
0
#---------------------------
elif
s
==
IN_GAME
:
reward
=
0
#---------------------------
elif
s
==
SERVER_DOWN
:
reward
=
0
#---------------------------
else
:
print
(
"Error: Unknown GameState"
,
s
)
return
reward
def
purge_features
(
state
):
st
=
np
.
empty
(
NF
,
dtype
=
np
.
float64
)
stateIndex
=
0
tmpIndex
=
9
+
3
*
NOT
for
i
in
range
(
len
(
state
)):
# Ignore first six features and teammate proximity to opponent(when opponent is absent)and opponent features
if
(
i
<
6
or
i
>
9
+
6
*
NOT
or
(
args
.
numOpponents
==
0
and
((
i
>
9
+
numTMates
and
i
<=
9
+
2
*
numTMates
)
or
i
==
9
))
):
continue
;
#Ignore Angle and Uniform Number of Teammates
temp
=
i
-
tmpIndex
;
if
(
temp
>
0
and
(
temp
%
3
==
2
or
temp
%
3
==
0
)):
continue
;
if
(
i
>
9
+
6
*
NOT
):
continue
;
st
[
stateIndex
]
=
state
[
i
];
stateIndex
+=
1
;
return
st
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--port'
,
type
=
int
,
default
=
6000
)
parser
.
add_argument
(
'--numTeammates'
,
type
=
int
,
default
=
0
)
parser
.
add_argument
(
'--numOpponents'
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
'--numEpisodes'
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
'--learnRate'
,
type
=
float
,
default
=
0.1
)
parser
.
add_argument
(
'--suffix'
,
type
=
int
,
default
=
0
)
args
=
parser
.
parse_args
()
# Create the HFO Environment
hfo
=
HFOEnvironment
()
#now connect to the server
hfo
.
connectToServer
(
HIGH_LEVEL_FEATURE_SET
,
'bin/teams/base/config/formations-dt'
,
args
.
port
,
'localhost'
,
'base_left'
,
False
)
global
NF
,
NA
,
NOT
if
args
.
numOpponents
>
0
:
NF
=
4
+
4
*
args
.
numTeammates
else
:
NF
=
3
+
3
*
args
.
numTeammates
NOT
=
args
.
numTeammates
NA
=
NOT
+
2
#PASS to each teammate, SHOOT, DRIBBLE
learnR
=
args
.
learnRate
#CMAC parameters
resolution
=
0.1
Range
=
[
2
]
*
NF
Min
=
[
-
1
]
*
NF
Res
=
[
resolution
]
*
NF
#Sarsa Agent Parameters
wt_filename
=
"weights_"
+
str
(
NOT
+
1
)
+
"v"
+
str
(
args
.
numOpponents
)
+
'_'
+
str
(
args
.
suffix
)
discFac
=
1
Lambda
=
0
eps
=
0.01
#initialize the function approximator and the sarsa agent
FA
=
CMAC
(
NF
,
NA
,
Range
,
Min
,
Res
)
SA
=
SarsaAgent
(
NF
,
NA
,
learnR
,
eps
,
Lambda
,
FA
,
wt_filename
,
wt_filename
)
#episode rollouts
st
=
np
.
empty
(
NF
,
dtype
=
np
.
float64
)
action
=
-
1
reward
=
0
for
episode
in
range
(
1
,
args
.
numEpisodes
+
1
):
count
=
0
status
=
IN_GAME
action
=-
1
while
status
==
IN_GAME
:
count
=
count
+
1
# Grab the state features from the environment
state
=
hfo
.
getState
()
if
int
(
state
[
5
])
==
1
:
if
action
!=
-
1
:
#print(st)
reward
=
getReward
(
status
)
#fb.SA.update(state,action,reward,discFac)
SA
.
update
(
st
,
action
,
reward
,
discFac
)
st
=
purge_features
(
state
)
#take an action
#action = fb.SA.selectAction(state)
action
=
SA
.
selectAction
(
st
)
#print("Action:", action)
if
action
==
0
:
hfo
.
act
(
SHOOT
)
elif
action
==
1
:
hfo
.
act
(
DRIBBLE
)
else
:
hfo
.
act
(
PASS
,
state
[(
9
+
6
*
NOT
)
-
(
action
-
2
)
*
3
])
else
:
hfo
.
act
(
MOVE
)
status
=
hfo
.
step
()
#--------------- end of while loop ------------------------------------------------------
############# EPISODE ENDS ###################################################################################
# Check the outcome of the episode
if
action
!=
-
1
:
reward
=
getReward
(
status
)
SA
.
update
(
st
,
action
,
reward
,
discFac
)
SA
.
endEpisode
()
############################################################################################################
# Quit if the server goes down
if
status
==
SERVER_DOWN
:
hfo
.
act
(
QUIT
)
break
example/simulate_python_sarsa_agents.sh
0 → 100755
View file @
08e20ef6
#! /bin/sh
#This script calls the python implementation of the high_level_sarsa_agent
#In essence it calls the relevant functions from a thin python wrapper written over the C++ sarsa_libraries
# HOW TO RUN
#takes in the number of trails as first argument
#takes in the number of offense agents as second argument
#takes in the number of defense agents as the third argument
# eg. if one needs to run 200 episodes of 2v2 then execute
# ./simulate_python_sarsa_agents.sh 200 2 2
port
=
6000
trials
=
10000
oa
=
2
#number of offense agents
da
=
1
#number of defense agents
if
[
"$#"
-lt
1
]
then
:
else
trials
=
$1
oa
=
$2
da
=
$3
fi
#kill any other simulations that may be running
killall
-9
rcssserver
sleep
2
cd
..
#cd to HFO directory
rm
weights
*
#remove weights from old runs
python
=
"/usr/bin/python3"
#which python?
agent_path
=
"./example/sarsa_offense"
log_dir
=
"log"
output_path
=
$agent_path
agent_filename
=
"high_level_sarsa_agent.py"
#start the server
stdbuf
-oL
./bin/HFO
--port
=
$port
--no-logging
--offense-agents
=
$oa
--defense-npcs
=
$da
--trials
=
$trials
--defense-team
=
base
--headless
--fullstate
>
$log_dir
/
"
$oa
"
v
"
$da
""_sarsa_py_agents.log"
&
#each agent is a seperate process
for
n
in
$(
seq
1
$oa
)
do
sleep
5
fname
=
"agent"
fname+
=
$n
fname+
=
".txt"
logfile
=
$log_dir
/
$fname
rm
$logfile
$python
$agent_path
/
$agent_filename
--port
=
$port
--numTeammates
=
`
expr
$oa
- 1
`
--numOpponents
=
$da
--numEpisodes
=
$trials
&>
$log_dir
/
$fname
&
done
# The magic line
# $$ holds the PID for this script
# Negation means kill by process group id instead of PID
trap
"kill -TERM -
$$
"
SIGINT
wait
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment