diff --git a/QLearning/bin/StateAndReward.class b/QLearning/bin/StateAndReward.class index ce9df9a9c261d08fdc1ee95fc1cfcf196a454b97..bd18e9881548d9eb85ea11a342fe3672d576af21 100644 Binary files a/QLearning/bin/StateAndReward.class and b/QLearning/bin/StateAndReward.class differ diff --git a/QLearning/src/StateAndReward.java b/QLearning/src/StateAndReward.java index 035c544951d968de300955c9835efcbeb73cc006..1986fa6f54fc1dbcccc0b287c578f40b5d3910eb 100644 --- a/QLearning/src/StateAndReward.java +++ b/QLearning/src/StateAndReward.java @@ -12,6 +12,7 @@ public class StateAndReward { //State 3 is veering right //State 1,2 is quite good as it is within Pi/8 angle from normal //state = String.valueOf(angle); + //concate _S to make the state more readable in terminal return state.concat("_S"); } @@ -24,7 +25,7 @@ public class StateAndReward { int state = discretize(angle, 4, -3.14/8,3.14/8); //if state is 1 or 2, reward with positives - //else -5 + //else penalize switch(state) { case 1: case 2: @@ -48,7 +49,10 @@ public class StateAndReward { String state = "OneStateToRuleThemAll2"; String angle_state = String.valueOf(discretize(angle, 5, -3.14/8,3.14/8)); String vy_state = String.valueOf(discretize(vy, 5, -1,1)) ; + //add extra emphasis on value of vy being very close to 0 + //but do not penalize if it cannot reach such value String vy_state_acc = String.valueOf(discretize(vy, 3, -0.1,0.1)); + //small emphasis on vx because having the angle be upright already compensate for vx String vx_state = String.valueOf(discretize(vx, 3, -0.5,0.5)); state = angle_state +"_"+ vy_state +"_" + vy_state_acc + "_"+ vx_state; return state; @@ -57,12 +61,12 @@ public class StateAndReward { /* Reward function for the full hover controller */ public static double getRewardHover(double angle, double vx, double vy) { - /* TODO: IMPLEMENT THIS FUNCTION */ - double reward = 0; String state = getStateHover(angle, vx, vy); + //Separate state to its component String[] state_str = state.split("_"); int [] state_int = new int[state_str.length]; + //Cast the strings to int for each comparison for (int i = 0; i< state_int.length; i++){ state_int[i] = Integer.valueOf(state_str[i]); }