First commit

a6b982c3 · Shashank Suhas · b8b2a1d4 · a6b982c3 · a6b982c3 · a6b982c3
Commit a6b982c3 authored Feb 11, 2021 by Shashank Suhas
46 changed files
--- a/20k_10trials_logs_archive.tar.gz
+++ b/20k_10trials_logs_archive.tar.gz
--- a/batch.sh
+++ b/batch.sh
+#!/bin/bash
+
+for i in `seq 10`
+do
+	./bin/HFO --offense-agents=3 --defense-npcs=2 --defense-team=helios  --offense-on-ball 5 --headless  > "logfile_$i" &
+	P1=$!
+	sleep 5
+	(cd ./example/sarsa_offense/ && rm -f "weight*" && ./high_level_sarsa_agent --numAgents 3 --numEpisodes 20000 &)
+	P2=$!
+
+	wait $P1 $P2
+	echo "COMPLETED"
+done
--- a/bin/HFO
+++ b/bin/HFO
@@ -18,6 +18,9 @@ SERVER_BIN = 'rcssserver'
 # Command to run the monitor. Edit as needed; ditto to the above re directories.
 MONITOR_BIN = 'soccerwindow2'

+# Progress percentage
+success_rate_vs_time = []
+
 def cleanup():
  """Cleanup even if doing SystemExit, as with term."""
  for p in reversed(processes):
@@ -40,6 +43,7 @@ def launch(cmd, name = 'Unknown', necessary = True, suppressOutput = True):
  if suppressOutput:
    kwargs = {'stdout': open(os.devnull, 'w'),
              'stderr': open(os.devnull, 'w')}
+  print(cmd)
  try:
    p = subprocess.Popen(cmd.split(' '), shell = False, **kwargs)
  except (IOError, OSError):

--- a/bin/Trainer.py
+++ b/bin/Trainer.py
@@ -3,6 +3,8 @@

 import sys, numpy, time, os, subprocess, Teams
 from Communicator import ClientCommunicator, TimeoutError
+import matplotlib.pyplot as plt
+

 class DoneError(Exception):
  """ This exception is thrown when the Trainer is finished. """
@@ -51,6 +53,13 @@ class Trainer(object):
    self._connectedPlayers = [] # List of connected players
    self.initMsgHandlers()

+    self.fd = open('progress_data', 'w')  # File to hold the game lengths, and types of game endings
+
+
+  def __del__(self):
+    self.fd.close()
+
+
  def launch_agent(self, agent_num, agent_ext_num, play_offense, port, wait_until_join=True):
    """Launches a learning agent using the agent binary

@@ -166,22 +175,33 @@ class Trainer(object):
      self._numGoals += 1
      self._numGoalFrames += self._frame - self._lastTrialStart
      endOfTrial = True
+      game_duration = self._frame - self._lastTrialStart
+      self.fd.write(str(game_duration) + ',GOAL\n')
    elif event == 'OUT_OF_BOUNDS':
      self._numBallsOOB += 1
      endOfTrial = True
+      game_duration = self._frame - self._lastTrialStart
+      self.fd.write(str(game_duration) + ',OOB\n')
    elif 'CAPTURED_BY_DEFENSE' in event:
      self._numBallsCaptured += 1
      endOfTrial = True
+      game_duration = self._frame - self._lastTrialStart
+      self.fd.write(str(game_duration) + ',DEF\n')
    elif event == 'OUT_OF_TIME':
      self._numOutOfTime += 1
      endOfTrial = True
+      game_duration = self._frame - self._lastTrialStart
+      self.fd.write(str(game_duration) + ',OOT\n')
    elif event == 'HFO_FINISHED':
      self._done = True
    if endOfTrial:
      self._numTrials += 1
+      if self._numTrials % 100 == 0:
+        print(self._numTrials)
      print('EndOfTrial: %d / %d %d %s'%\
        (self._numGoals, self._numTrials, self._frame, event))
      self._numFrames += self._frame - self._lastTrialStart
+#      print("############# MATCH LENGTH (IN FRAMES)", self._frame - self._lastTrialStart, ".################")
      self._lastTrialStart = self._frame
      self.getConnectedPlayers()


--- a/end_frame_ts
+++ b/end_frame_ts
--- a/example/agent1.txt
+++ b/example/agent1.txt
+
--- a/example/sarsa_defense/high_level_sarsa_agent
+++ b/example/sarsa_defense/high_level_sarsa_agent
--- a/example/sarsa_defense/weights_6000_2_0_10_
+++ b/example/sarsa_defense/weights_6000_2_0_10_
--- a/example/sarsa_libraries/funcapprox/CMAC.cpp
+++ b/example/sarsa_libraries/funcapprox/CMAC.cpp
 #include "CMAC.h"

 #define TILINGS_PER_GROUP 32
+#include<random>
+

 CMAC::CMAC(int numF, int numA, double r[], double m[], double res[]):FunctionApproximator(numF,numA){

+  double lb = -1, ub = 1;
+  std::uniform_real_distribution<double> unif(lb, ub);
+  std::default_random_engine re;
+  
  for(int i = 0; i < numF; i++){
    ranges[i] = r[i];
    minValues[i] = m[i];
@@ -15,6 +21,7 @@ CMAC::CMAC(int numF, int numA, double r[], double m[], double res[]):FunctionApp
  numNonzeroTraces = 0;
  for(int i = 0; i < RL_MEMORY_SIZE; i++){
    weights[i] = 0;
+    // weights[i] = unif(re);
    traces[i] = 0;
  }

@@ -46,6 +53,11 @@ void CMAC::setState(double s[]){
 void CMAC::updateWeights(double delta, double alpha){

  double tmp = delta * alpha / numTilings;
+
+  // std::cout<<"TMP var \t"<<tmp<<std::endl;
+  
+  // if(numNonzeroTraces > 0)
+  //   std::cout<<"weight update"<<std::endl;
  
  for(int i = 0; i < numNonzeroTraces; i++){

@@ -55,6 +67,8 @@ void CMAC::updateWeights(double delta, double alpha){
    }

    weights[f] += tmp * traces[f];
+    // if(weights[f] != 0)
+    //   std::cout<<weights[f]<<std::endl;
  }
 }

@@ -149,17 +163,24 @@ void CMAC::increaseMinTrace(){

 void CMAC::read(char *fileName){

-  std::fstream file;
-  file.open(fileName, std::ios::in | std::ios::binary);
-  file.read((char *) weights, RL_MEMORY_SIZE * sizeof(double));
-  unsigned long pos = file.tellg();
-  file.close();
+  std::cout<<"Not reading weights"<<std::endl;
+  // std::fstream file;
+  // file.open(fileName, std::ios::in | std::ios::binary);
+  // file.read((char *) weights, RL_MEMORY_SIZE * sizeof(double));
+  // unsigned long pos = file.tellg();
+  // file.close();

-  colTab->restore(fileName, pos);
+  // for(int i=0; i<RL_MEMORY_SIZE; i++)
+  //   std::cout<<weights[i]<<std::endl;
+
+//  colTab->restore(fileName, pos);
 }

 void CMAC::write(char *fileName){

+  // for(int i=0; i<RL_MEMORY_SIZE; i++)
+  //   std::cout<<weights[i]<<std::endl;
+  
  std::fstream file;
  file.open(fileName, std::ios::out | std::ios::binary);
  file.write((char *) weights, RL_MEMORY_SIZE * sizeof(double));
@@ -177,6 +198,12 @@ void CMAC::reset(){
  }
 }

+void CMAC::copyWeights(CMAC *obj){
+  for (int i = 0; i < RL_MEMORY_SIZE; i++)
+    weights[i] = obj->weights[i];
+}
+
+
 void CMAC::loadTiles(){

  int tilingsPerGroup = TILINGS_PER_GROUP;  /* num tilings per tiling group */
@@ -205,7 +232,8 @@ double CMAC::computeQ(int action){
  for(int j = 0; j < numTilings; j++){
    q += weights[tiles[action][j]];
  }
-
+  // if(q!=0)
+  //   std::cout<<"QValue:\t"<<q<<std::endl;
  return q;
 }


--- a/example/sarsa_libraries/funcapprox/CMAC.h
+++ b/example/sarsa_libraries/funcapprox/CMAC.h
@@ -49,6 +49,8 @@ class CMAC: public FunctionApproximator{

  CMAC(int numF, int numA, double r[], double m[], double res[]);

+  void copyWeights(CMAC*);
+  
  void setState(double s[]);

  void updateWeights(double delta, double alpha);

--- a/example/sarsa_libraries/policy/PolicyAgent.cpp
+++ b/example/sarsa_libraries/policy/PolicyAgent.cpp
@@ -47,6 +47,7 @@ int  PolicyAgent::argmaxQ(double state[]){
 }

 double PolicyAgent::computeQ(double state[], int action){
+  std::cout<<"PolicyAgent ComputeQ being called"<<std::endl;
  return 0;
 }

--- a/example/sarsa_libraries/policy/SarsaAgent.cpp
+++ b/example/sarsa_libraries/policy/SarsaAgent.cpp
@@ -2,6 +2,7 @@

 //add lambda as parameter to sarsaagent
 SarsaAgent::SarsaAgent(int numFeatures, int numActions, double learningRate, double epsilon, double lambda, FunctionApproximator *FA, char *loadWeightsFile, char *saveWeightsFile):PolicyAgent(numFeatures, numActions, learningRate, epsilon, FA, loadWeightsFile, saveWeightsFile){
+  std::cout<<"Num actions: \t"<<numActions<<std::endl;
  this->lambda = lambda;
  episodeNumber = 0;
  lastAction = -1;
@@ -31,8 +32,11 @@ void SarsaAgent::update(double state[], int action, double reward, double discou

    //Sarsa update
    double newQ = FA->computeQ(action);
+    //    std::cout<<"newQ \t"<<newQ<<std::endl;
    delta += discountFactor * newQ;

+    // std::cout<<"Delta: \t"<<delta<<std::endl;
+    
    FA->updateWeights(delta, learningRate);
    //Assume gamma, lambda are 0.
    FA->decayTraces(discountFactor*lambda);//replace 0 with gamma*lambda
@@ -45,6 +49,10 @@ void SarsaAgent::update(double state[], int action, double reward, double discou
  }
 }

+void SarsaAgent::copyWeights(SarsaAgent *agent){
+  dynamic_cast<CMAC*>(FA)->copyWeights(dynamic_cast<CMAC*>(agent->FA));
+}
+
 void SarsaAgent::endEpisode(){

  episodeNumber++;
@@ -66,7 +74,7 @@ void SarsaAgent::endEpisode(){

  if(toSaveWeights && (episodeNumber + 1) % 5 == 0){
    saveWeights(saveWeightsFile);
-    std::cout << "Saving weights to " << saveWeightsFile << std::endl;
+    //    std::cout << "Saving weights to " << saveWeightsFile << std::endl;
  }

  lastAction = -1;
@@ -88,7 +96,8 @@ int SarsaAgent::selectAction(double state[]){
  else{
    action = argmaxQ(state);
  }
-  
+
+  // std::cout<<"Action id\t"<<action<<std::endl;
  return action;
 }


--- a/example/sarsa_libraries/policy/SarsaAgent.h
+++ b/example/sarsa_libraries/policy/SarsaAgent.h
@@ -2,7 +2,9 @@
 #define SARSA_AGENT

 #include "PolicyAgent.h"
-#include "FuncApprox.h"
+// #include "FuncApprox.h"
+#include "CMAC.h"
+// #define RL_MEMORY_SIZE 1048576

 class SarsaAgent:public PolicyAgent{

@@ -18,6 +20,8 @@ class SarsaAgent:public PolicyAgent{

  SarsaAgent(int numFeatures, int numActions, double learningRate, double epsilon, double lambda, FunctionApproximator *FA, char *loadWeightsFile, char *saveWeightsFile);

+  void copyWeights(SarsaAgent*);
+
  int  argmaxQ(double state[]);
  double computeQ(double state[], int action);


--- a/example/sarsa_offense/Makefile
+++ b/example/sarsa_offense/Makefile
@@ -12,7 +12,8 @@ FA_LIB = funcapprox
 POLICY_LIB = policyagent

 #Flags
-CXXFLAGS = -g -Wall -std=c++11 -pthread
+# CXXFLAGS = -g -Wall -std=c++11 -pthread
+CXXFLAGS = -O3 -Wall -std=c++11 -pthread
 LDFLAGS = -l$(FA_LIB) -l$(POLICY_LIB) -lhfo -pthread
 LDLIBS = -L$(FA_DIR) -L$(POLICY_DIR) -L$(HFO_LIB_DIR)
 LINKEROPTIONS = -Wl,-rpath,$(HFO_LIB_DIR)
@@ -21,7 +22,9 @@ LINKEROPTIONS = -Wl,-rpath,$(HFO_LIB_DIR)
 CXX = g++

 #Sources
-SRC = high_level_sarsa_agent.cpp
+# SRC = high_level_sarsa_agent.cpp
+SRC = high_level_sarsa_agent_changed.cpp
+

 #Objects
 OBJ = $(SRC:.cpp=.o)

--- a/example/sarsa_offense/early_game_model_6000_3_0
+++ b/example/sarsa_offense/early_game_model_6000_3_0
--- a/example/sarsa_offense/early_game_model_6001_3_0
+++ b/example/sarsa_offense/early_game_model_6001_3_0
--- a/example/sarsa_offense/early_game_model_6002_3_0
+++ b/example/sarsa_offense/early_game_model_6002_3_0
--- a/example/sarsa_offense/high_level_sarsa_agent
+++ b/example/sarsa_offense/high_level_sarsa_agent
--- a/example/sarsa_offense/high_level_sarsa_agent.cpp
+++ b/example/sarsa_offense/high_level_sarsa_agent.cpp
@@ -107,9 +107,25 @@ void offenseAgent(int port, int numTMates, int numEpi, double learnR,
                  "_" + std::to_string(numTMates + 1) +
                  "_" + std::to_string(suffix);
  wtFile = &s[0u];
+  // std::string filename1 = "early_game_model_" + std::to_string(port) + 
+  //   "_" + std::to_string(numTMates + 1) +
+  //   "_" + std::to_string(suffix);
+  // std::string filename2 = "late_game_model_" + std::to_string(port) + 
+  //   "_" + std::to_string(numTMates + 1) +
+  //   "_" + std::to_string(suffix);
+
+  // char *str1 = &filename1[0u];
+  // char *str2 = &filename2[0u];
+  
  double lambda = 0;
  CMAC *fa = new CMAC(numF, numA, range, min, res);
+  // CMAC *fa1 = new CMAC(numF, numA, range, min, res);
+  // CMAC *fa2 = new CMAC(numF, numA, range, min, res);
+  // CMAC *fa = fa1;
  SarsaAgent *sa = new SarsaAgent(numF, numA, learnR, eps, lambda, fa, wtFile, wtFile);
+  // SarsaAgent *sa1 = new SarsaAgent(numF, numA, learnR, eps, lambda, fa1, str1, str1);
+  // SarsaAgent *sa2 = new SarsaAgent(numF, numA, learnR, eps, lambda, fa2, str2, str2);
+  // SarsaAgent *sa = sa1;

  hfo::HFOEnvironment hfo;
  hfo::status_t status;
@@ -119,18 +135,28 @@ void offenseAgent(int port, int numTMates, int numEpi, double learnR,
  double reward;
  hfo.connectToServer(hfo::HIGH_LEVEL_FEATURE_SET,"../../bin/teams/base/config/formations-dt",6000,"localhost","base_left",false,"");
  for (int episode=0; episode < numEpi; episode++) {
-    int count = 0;
+    //    int count = 0;
    status = hfo::IN_GAME;
    action = -1;
+    // bool model_changed_flag = false;
    while (status == hfo::IN_GAME) {
      const std::vector<float>& state_vec = hfo.getState();
      // If has ball
+      // sleep(1);
+      // std::cout<<state_vec[numTMates]<<std::endl;
+      
      if(state_vec[5] == 1) {
        if(action != -1) {
          reward = getReward(status);
          sa->update(state, action, reward, discFac);
        }

+	// if(state_vec[numTMates] >= 0.2 && model_changed_flag == false)
+	//   {
+	//     sa = sa2;
+	//     model_changed_flag = true;
+	//   }
+	
        // Fill up state array
        purgeFeatures(state, state_vec, numTMates, oppPres);

@@ -156,11 +182,16 @@ void offenseAgent(int port, int numTMates, int numEpi, double learnR,
      reward = getReward(status);
      sa->update(state, action, reward, discFac);
      sa->endEpisode();
+      // sa1->endEpisode();
+      // sa2->endEpisode();
+      // sa = sa1;
+      // model_changed_flag = false;
    }
  }

-  delete sa;
-  delete fa;
+  delete sa, fa;
+  // delete sa1, sa2;
+  // delete fa1, fa2;
 }

 int main(int argc, char **argv) {

--- a/example/sarsa_offense/high_level_sarsa_agent_changed.cpp
+++ b/example/sarsa_offense/high_level_sarsa_agent_changed.cpp
+#include <iostream>
+#include <vector>
+#include <HFO.hpp>
+#include <cstdlib>
+#include <thread>
+#include "SarsaAgent.h"
+#include "CMAC.h"
+#include <unistd.h>
+
+// Before running this program, first Start HFO server:
+// $./bin/HFO --offense-agents numAgents
+
+void printUsage() {
+  std::cout<<"Usage: ./high_level_sarsa_agent [Options]"<<std::endl;
+  std::cout<<"Options:"<<std::endl;
+  std::cout<<"  --numAgents <int>        Number of SARSA agents"<<std::endl;
+  std::cout<<"                           Default: 1"<<std::endl;
+  std::cout<<"  --numEpisodes <int>      Number of episodes to run"<<std::endl;
+  std::cout<<"                           Default: 10"<<std::endl;
+  std::cout<<"  --basePort <int>         SARSA agent base port"<<std::endl;
+  std::cout<<"                           Default: 6000"<<std::endl;
+  std::cout<<"  --learnRate <float>      Learning rate of SARSA agents"<<std::endl;
+  std::cout<<"                           Range: [0.0, 1.0]"<<std::endl;
+  std::cout<<"                           Default: 0.1"<<std::endl;
+  std::cout<<"  --suffix <int>           Suffix for weights files"<<std::endl;
+  std::cout<<"                           Default: 0"<<std::endl;
+  std::cout<<"  --noOpponent             Sets opponent present flag to false"<<std::endl;
+  std::cout<<"  --help                   Displays this help and exit"<<std::endl;
+}
+
+// Returns the reward for SARSA based on current state
+double getReward(int status) {
+  double reward;
+  if (status==hfo::GOAL) reward = 1;
+  else if (status==hfo::CAPTURED_BY_DEFENSE) reward = -1;
+  else if (status==hfo::OUT_OF_BOUNDS) reward = -1;
+  else reward = 0;
+  return reward;
+}
+
+// Fill state with only the required features from state_vec
+void purgeFeatures(double *state, const std::vector<float>& state_vec,
+                   int numTMates, bool oppPres) {
+
+  int stateIndex = 0;
+
+  // If no opponents ignore features Distance to Opponent
+  // and Distance from Teammate i to Opponent are absent
+  int tmpIndex = 9 + 3 * numTMates;
+
+  for(int i = 0; i < state_vec.size(); i++) {
+
+    // Ignore first six features and teammate proximity to opponent(when opponent is absent)and opponent features
+    if(i < 6||(!oppPres && ((i>9+numTMates && i<=9+2*numTMates)||i==9))||i>9+6*numTMates) continue;
+
+    // Ignore Angle and Uniform Number of Teammates
+    int temp =  i-tmpIndex;
+    if(temp > 0 && (temp % 3 == 2 || temp % 3 == 0)) continue;
+    if (i > 9+6*numTMates) continue;
+    state[stateIndex] = state_vec[i];
+    stateIndex++;
+  }
+  //std::cout<<stateIndex<<"yo";
+}
+
+// Convert int to hfo::Action
+hfo::action_t toAction(int action, const std::vector<float>& state_vec) {
+  hfo::action_t a;
+  switch(action) {
+    case 0: a = hfo::SHOOT;
+            break;
+    case 1: a = hfo::DRIBBLE;
+            break;
+    default:int size = state_vec.size();
+            a = hfo::PASS;/*,
+                 state_vec[(size - 1) - (action - 2) * 3],
+                 0.0};*/
+  }
+  return a;
+}
+
+void offenseAgent(int port, int numTMates, int numEpi, double learnR,
+                  int suffix, bool oppPres, double eps) {
+
+  // Number of features
+  int numF = oppPres ? (4 + 4 * numTMates) : (3 + 3 * numTMates);
+  // Number of actions
+  int numA = 2 + numTMates;
+
+  double discFac = 1;
+
+  // Tile coding parameter
+  double resolution = 0.1;
+
+  double range[numF];
+  double min[numF];
+  double res[numF];
+  for(int i = 0; i < numF; i++) {
+      min[i] = -1;
+      range[i] = 2;
+      res[i] = resolution;
+  }
+
+  // Weights file
+  char *wtFile;
+  std::string s = "weights_" + std::to_string(port) +
+                  "_" + std::to_string(numTMates + 1) +
+                  "_" + std::to_string(suffix);
+  wtFile = &s[0u];
+  std::string filename1 = "early_game_model_" + std::to_string(port) + 
+    "_" + std::to_string(numTMates + 1) +
+    "_" + std::to_string(suffix);
+  std::string filename2 = "late_game_model_" + std::to_string(port) + 
+    "_" + std::to_string(numTMates + 1) +
+    "_" + std::to_string(suffix);
+
+  char *str1 = &filename1[0u];
+  char *str2 = &filename2[0u];
+  
+  double lambda = 0;
+  // CMAC fa1(numF, numA, range, min, res);
+  // CMAC fa2(numF, numA, range, min, res);
+  CMAC *fa1 = new CMAC(numF, numA, range, min, res);
+  CMAC *fa2 = new CMAC(numF, numA, range, min, res);
+  CMAC *fa = fa1;
+  // SarsaAgent sa1(numF, numA, learnR, eps, lambda, &fa1, filename1.c_str(), filename1.c_str());
+  // SarsaAgent sa1(numF, numA, learnR, eps, lambda, &fa1, filename2.c_str(), filename2.c_str());
+  SarsaAgent *sa1 = new SarsaAgent(numF, numA, learnR, eps, lambda, fa1, str1, str1);
+  SarsaAgent *sa3 = new SarsaAgent(numF, numA, learnR, eps, lambda, fa2, str2, str2);
+  SarsaAgent *sa = sa1, *sa2 = sa1;
+
+  hfo::HFOEnvironment hfo;
+  hfo::status_t status;
+  hfo::action_t a;
+  double state[numF];
+  int action = -1;
+  double reward;
+  //  bool second_model_active = true;
+  hfo.connectToServer(hfo::HIGH_LEVEL_FEATURE_SET,"../../bin/teams/base/config/formations-dt",6000,"localhost","base_left",false,"");
+  for (int episode=0; episode < numEpi; episode++) {
+    if(episode==6000)
+      {
+	// for(int i=0; i<RL_MEMORY_SIZE; i++)
+	//   sa3->FA->weights[i] = sa1->FA->weights[i];
+	sa3->copyWeights(sa1);
+	sa2 = sa3;
+      }
+    int count = 0;
+    status = hfo::IN_GAME;
+    action = -1;
+    bool model_changed_flag = false;
+    int iter_count = -1;
+    while (status == hfo::IN_GAME) {
+      iter_count++;
+      const std::vector<float>& state_vec = hfo.getState();
+      // If has ball
+      // sleep(1);
+      // std::cout<<state_vec[numTMates]<<std::endl;
+      
+      if(state_vec[5] == 1) {
+        if(action != -1) {
+          reward = getReward(status);
+          sa->update(state, action, reward, discFac);
+        }
+
+	if(state_vec[numTMates] >= 0.2 && model_changed_flag == false)
+	if(iter_count > 100 && model_changed_flag == false)
+	  {
+	    sa = sa2;
+	    model_changed_flag = true;
+	  }
+	
+        // Fill up state array
+        purgeFeatures(state, state_vec, numTMates, oppPres);
+
+	// Get raw action
+        action = sa->selectAction(state);
+
+        // Get hfo::Action
+        a = toAction(action, state_vec);
+
+      } else {
+            a = hfo::MOVE;
+      }
+      if (a== hfo::PASS) {
+           hfo.act(a,state_vec[(9+6*numTMates) - (action-2)*3]);
+           //std::cout<<(9+6*numTMates) - (action-2)*3;
+        } else {
+           hfo.act(a);
+        }
+      status = hfo.step();
+    }
+    // End of episode
+    if(action != -1) {
+      reward = getReward(status);
+      sa->update(state, action, reward, discFac);
+      sa1->endEpisode();
+      sa2->endEpisode();
+      sa = sa1;
+      model_changed_flag = false;
+    }
+  }
+
+  delete sa1, sa2;
+  delete fa1, fa2;
+}
+
+int main(int argc, char **argv) {
+
+  int numAgents = 1;
+  int numEpisodes = 10;
+  int basePort = 6000;
+  double learnR = 0.1;
+  int suffix = 0;
+  bool opponentPresent = true;
+  double eps = 0.01;
+  for(int i = 1; i < argc; i++) {
+    std::string param = std::string(argv[i]);
+    if(param == "--numAgents") {
+      numAgents = atoi(argv[++i]);
+    }else if(param == "--numEpisodes") {
+      numEpisodes = atoi(argv[++i]);
+    }else if(param == "--basePort") {
+      basePort = atoi(argv[++i]);
+    }else if(param == "--learnRate") {
+      learnR = atof(argv[++i]);
+      if(learnR < 0 || learnR > 1) {
+        printUsage();
+        return 0;
+      }
+    }else if(param == "--suffix") {
+      suffix = atoi(argv[++i]);
+    }else if(param == "--noOpponent") {
+      opponentPresent = false;
+    }else if(param=="--eps"){
+        eps=atoi(argv[++i]);
+    }else {
+      printUsage();
+      return 0;
+    }
+  }
+
+  int numTeammates = numAgents - 1;
+
+  std::thread agentThreads[numAgents];
+  for (int agent = 0; agent < numAgents; agent++) {
+    agentThreads[agent] = std::thread(offenseAgent, basePort + agent,
+                                      numTeammates, numEpisodes, learnR,
+                                      suffix, opponentPresent, eps);
+    usleep(500000L);
+  }
+  for (int agent = 0; agent < numAgents; agent++) {
+    agentThreads[agent].join();
+  }
+  return 0;
+}
--- a/example/sarsa_offense/late_game_model_6000_3_0
+++ b/example/sarsa_offense/late_game_model_6000_3_0
--- a/example/sarsa_offense/late_game_model_6001_3_0
+++ b/example/sarsa_offense/late_game_model_6001_3_0
--- a/example/sarsa_offense/late_game_model_6002_3_0
+++ b/example/sarsa_offense/late_game_model_6002_3_0
--- a/example/sarsa_offense/weights_6000_3_0
+++ b/example/sarsa_offense/weights_6000_3_0
--- a/example/sarsa_offense/weights_6001_3_0
+++ b/example/sarsa_offense/weights_6001_3_0
--- a/example/sarsa_offense/weights_6002_3_0
+++ b/example/sarsa_offense/weights_6002_3_0
--- a/example/thread_agents
+++ b/example/thread_agents
--- a/example/~/.rcssserver/CSVSaver.conf
+++ b/example/~/.rcssserver/CSVSaver.conf
+/* CSVSaver Configuration file */
+
+# CSVSaver::version
+CSVSaver::version = '15.2.2'
+
+# CSVSaver::save
+/* If save is on/true, then the saver will attempt to save the results
+to the database.  Otherwise it will do nothing. */
+CSVSaver::save = false
+
+# CSVSaver::filename
+/* The file to save the results to.  If this file does not exist it
+will be created.  If the file does exist, the results will be appended
+to the end. */
+CSVSaver::filename = 'rcssserver.csv'
+
--- a/example/~/.rcssserver/player.conf
+++ b/example/~/.rcssserver/player.conf
+/* player Configuration file */
+
+# player::version
+player::version = '15.2.2'
+
+# player::player_types
+player::player_types = 18
+
+# player::pt_max
+player::pt_max = 1
+
+# player::random_seed
+player::random_seed = -1
+
+# player::subs_max
+player::subs_max = 3
+
+# player::allow_mult_default_type
+player::allow_mult_default_type = false
+
+# player::catchable_area_l_stretch_max
+player::catchable_area_l_stretch_max = 1.3
+
+# player::catchable_area_l_stretch_min
+player::catchable_area_l_stretch_min = 1
+
+# player::dash_power_rate_delta_max
+player::dash_power_rate_delta_max = 0
+
+# player::dash_power_rate_delta_min
+player::dash_power_rate_delta_min = 0
+
+# player::effort_max_delta_factor
+player::effort_max_delta_factor = -0.004
+
+# player::effort_min_delta_factor
+player::effort_min_delta_factor = -0.004
+
+# player::extra_stamina_delta_max
+player::extra_stamina_delta_max = 50
+
+# player::extra_stamina_delta_min
+player::extra_stamina_delta_min = 0
+
+# player::foul_detect_probability_delta_factor
+player::foul_detect_probability_delta_factor = 0
+
+# player::inertia_moment_delta_factor
+player::inertia_moment_delta_factor = 25
+
+# player::kick_power_rate_delta_max
+player::kick_power_rate_delta_max = 0
+
+# player::kick_power_rate_delta_min
+player::kick_power_rate_delta_min = 0
+
+# player::kick_rand_delta_factor
+player::kick_rand_delta_factor = 1
+
+# player::kickable_margin_delta_max
+player::kickable_margin_delta_max = 0.1
+
+# player::kickable_margin_delta_min
+player::kickable_margin_delta_min = -0.1
+
+# player::new_dash_power_rate_delta_max
+player::new_dash_power_rate_delta_max = 0.0008
+
+# player::new_dash_power_rate_delta_min
+player::new_dash_power_rate_delta_min = -0.0012
+
+# player::new_stamina_inc_max_delta_factor
+player::new_stamina_inc_max_delta_factor = -6000
+
+# player::player_decay_delta_max
+player::player_decay_delta_max = 0.1
+
+# player::player_decay_delta_min
+player::player_decay_delta_min = -0.1
+
+# player::player_size_delta_factor
+player::player_size_delta_factor = -100
+
+# player::player_speed_max_delta_max
+player::player_speed_max_delta_max = 0
+
+# player::player_speed_max_delta_min
+player::player_speed_max_delta_min = 0
+
+# player::stamina_inc_max_delta_factor
+player::stamina_inc_max_delta_factor = 0
+
--- a/example/~/.rcssserver/server.conf
+++ b/example/~/.rcssserver/server.conf
--- a/logfile_1
+++ b/logfile_1
--- a/logfile_10
+++ b/logfile_10
--- a/logfile_2
+++ b/logfile_2
--- a/logfile_3
+++ b/logfile_3
--- a/logfile_4
+++ b/logfile_4
--- a/logfile_5
+++ b/logfile_5
--- a/logfile_6
+++ b/logfile_6
--- a/logfile_7
+++ b/logfile_7
--- a/logfile_8
+++ b/logfile_8
--- a/logfile_9
+++ b/logfile_9
--- a/progress_data
+++ b/progress_data
+87,DEF
+79,DEF
+69,GOAL
+66,OOB
+78,GOAL
+98,DEF
+206,DEF
+43,DEF
+87,OOB
+129,DEF
+45,DEF
+62,DEF
+28,DEF
+101,DEF
+168,DEF
+283,DEF
+50,DEF
+158,OOB
+64,OOB
+217,DEF
+83,OOB
+96,DEF
+84,OOB
+94,OOB
+66,GOAL
+76,DEF
+25,DEF
+32,DEF
+32,DEF
+101,DEF
+66,GOAL
+149,DEF
+55,DEF
+58,DEF
+71,DEF
+139,OOB
+72,DEF
+310,GOAL
+93,GOAL
+87,OOB
+68,DEF
+94,DEF
+75,DEF
+26,DEF
+282,DEF
+143,DEF
+86,OOB
+127,GOAL
+76,DEF
+23,DEF
+169,DEF
+99,GOAL
+233,DEF
+82,DEF
+40,DEF
+11,OOB
+38,DEF
+39,DEF
+145,DEF
+70,DEF
+5,OOB
+97,OOB
+480,DEF
+69,OOB
+150,DEF
+5,OOB
+62,DEF
+66,OOB
+531,DEF
+92,DEF
+66,OOB
+610,DEF
+183,DEF
+253,DEF
+78,GOAL
+79,GOAL
+32,DEF
+93,OOB
+86,DEF
+95,DEF
+112,OOB
+121,GOAL
+62,DEF
+105,DEF
+5,OOB
+60,DEF
+112,OOB
+42,DEF
+89,OOB
+99,DEF
+119,DEF
+62,OOB
+97,GOAL
+5,OOB
+91,DEF
+179,OOB
+137,DEF
+43,DEF
+99,DEF
+154,OOB
--- a/results
+++ b/results
--- a/src/feature_extractor.cpp
+++ b/src/feature_extractor.cpp
@@ -99,9 +99,9 @@ void FeatureExtractor::addFeature(float val) {

 float FeatureExtractor::normalize(float val, float min_val, float max_val) {
  if (val < min_val || val > max_val) {
-    std::cout << "Feature " << featIndx << " Violated Feature Bounds: " << val
-              << " Expected min/max: [" << min_val << ", "
-              << max_val << "]" << std::endl;
+    // std::cout << "Feature " << featIndx << " Violated Feature Bounds: " << val
+    //           << " Expected min/max: [" << min_val << ", "
+    //           << max_val << "]" << std::endl;
    val = std::min(std::max(val, min_val), max_val);
  }
  return ((val - min_val) / (max_val - min_val))

--- a/~/.rcssserver/CSVSaver.conf
+++ b/~/.rcssserver/CSVSaver.conf
+/* CSVSaver Configuration file */
+
+# CSVSaver::version
+CSVSaver::version = '15.2.2'
+
+# CSVSaver::save
+/* If save is on/true, then the saver will attempt to save the results
+to the database.  Otherwise it will do nothing. */
+CSVSaver::save = false
+
+# CSVSaver::filename
+/* The file to save the results to.  If this file does not exist it
+will be created.  If the file does exist, the results will be appended
+to the end. */
+CSVSaver::filename = 'rcssserver.csv'
+
--- a/~/.rcssserver/player.conf
+++ b/~/.rcssserver/player.conf
+/* player Configuration file */
+
+# player::version
+player::version = '15.2.2'
+
+# player::player_types
+player::player_types = 18
+
+# player::pt_max
+player::pt_max = 1
+
+# player::random_seed
+player::random_seed = -1
+
+# player::subs_max
+player::subs_max = 3
+
+# player::allow_mult_default_type
+player::allow_mult_default_type = false
+
+# player::catchable_area_l_stretch_max
+player::catchable_area_l_stretch_max = 1.3
+
+# player::catchable_area_l_stretch_min
+player::catchable_area_l_stretch_min = 1
+
+# player::dash_power_rate_delta_max
+player::dash_power_rate_delta_max = 0
+
+# player::dash_power_rate_delta_min
+player::dash_power_rate_delta_min = 0
+
+# player::effort_max_delta_factor
+player::effort_max_delta_factor = -0.004
+
+# player::effort_min_delta_factor
+player::effort_min_delta_factor = -0.004
+
+# player::extra_stamina_delta_max
+player::extra_stamina_delta_max = 50
+
+# player::extra_stamina_delta_min
+player::extra_stamina_delta_min = 0
+
+# player::foul_detect_probability_delta_factor
+player::foul_detect_probability_delta_factor = 0
+
+# player::inertia_moment_delta_factor
+player::inertia_moment_delta_factor = 25
+
+# player::kick_power_rate_delta_max
+player::kick_power_rate_delta_max = 0
+
+# player::kick_power_rate_delta_min
+player::kick_power_rate_delta_min = 0
+
+# player::kick_rand_delta_factor
+player::kick_rand_delta_factor = 1
+
+# player::kickable_margin_delta_max
+player::kickable_margin_delta_max = 0.1
+
+# player::kickable_margin_delta_min
+player::kickable_margin_delta_min = -0.1
+
+# player::new_dash_power_rate_delta_max
+player::new_dash_power_rate_delta_max = 0.0008
+
+# player::new_dash_power_rate_delta_min
+player::new_dash_power_rate_delta_min = -0.0012
+
+# player::new_stamina_inc_max_delta_factor
+player::new_stamina_inc_max_delta_factor = -6000
+
+# player::player_decay_delta_max
+player::player_decay_delta_max = 0.1
+
+# player::player_decay_delta_min
+player::player_decay_delta_min = -0.1
+
+# player::player_size_delta_factor
+player::player_size_delta_factor = -100
+
+# player::player_speed_max_delta_max
+player::player_speed_max_delta_max = 0
+
+# player::player_speed_max_delta_min
+player::player_speed_max_delta_min = 0
+
+# player::stamina_inc_max_delta_factor
+player::stamina_inc_max_delta_factor = 0
+
--- a/~/.rcssserver/server.conf
+++ b/~/.rcssserver/server.conf