From 6fb04eef3c391c036579251dd63725eaac476bc7 Mon Sep 17 00:00:00 2001 From: nambh713 <nambh713@su02-108.ad.liu.se> Date: Thu, 17 Oct 2019 09:33:11 +0200 Subject: [PATCH] added comments again --- QLearning/bin/StateAndReward.class | Bin 2878 -> 2878 bytes QLearning/src/StateAndReward.java | 10 +++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/QLearning/bin/StateAndReward.class b/QLearning/bin/StateAndReward.class index ce9df9a9c261d08fdc1ee95fc1cfcf196a454b97..bd18e9881548d9eb85ea11a342fe3672d576af21 100644 GIT binary patch delta 282 zcmdldwohzB4=ba<<X+aPk`fHe43Z2246+Oc3<?aU42ldE3@Qw^44RXz*`7!mGO#ch zF^Di2Gbl5dF_<t|Ft{*SG88jdZMI;KWvq8#U}kV+;9+oP5M^*-kYR9RP-1XrFk*0H zux0ROaAoje@MZ922w?DINMP`1$Y2Oy$Yuy)C}s#_sAdRfXl96DXk&<En8pyrFpnXc zVIe~-!%BukhK&qK3_BT;8TK%wG8|&aWH`l;&2Wh!hv6zi9>ZORLI!RI28JL84hDwF zBAnM$n;BRcS{T?F+8KBmIvIo+x)?+mdKhFFdKnZLCNQWnOk~hum^_)EYpv`e1~!Hz h3_J`=8Tc5MGl(**V31^3%^=6HhC!8K-Q=5GsQ{}iHR=EW delta 282 zcmdldwohzB4=W@8<X+aPlHv@^3=#|i3^EJ`4Dt-73<?Yu49X0)3>uTI*`7!mFt9Kf zGKer3F(@+_Gng=#Ft{+7G88kIZMI;KWvsVjU}msq;9+oJ5M^*=kYR9QP-1XqFk*0G zuw`&(aAojd@MZ912w?DHNMP`0$YAhc$Y$_kC}s#^sAdReXl4juXk!Rvn8pysFpnXe zVIe~#!%Bu&hK&qy3_BU(8TK$FG8|$^WjMu<&Txq#gW)Pe7Q<bJTn26i28JL84hDwF zBAnM$>ls)X8W`9ani+T*S{Z~H+89I`Iv8XaIvErgdKgq0dKq*W`X}>qt(Bd}z{aqE hfrnus10Ta;22qA343Z4X8RQsNFsL%DntYQh6#z#ZH5LE> diff --git a/QLearning/src/StateAndReward.java b/QLearning/src/StateAndReward.java index 035c544..1986fa6 100644 --- a/QLearning/src/StateAndReward.java +++ b/QLearning/src/StateAndReward.java @@ -12,6 +12,7 @@ public class StateAndReward { //State 3 is veering right //State 1,2 is quite good as it is within Pi/8 angle from normal //state = String.valueOf(angle); + //concate _S to make the state more readable in terminal return state.concat("_S"); } @@ -24,7 +25,7 @@ public class StateAndReward { int state = discretize(angle, 4, -3.14/8,3.14/8); //if state is 1 or 2, reward with positives - //else -5 + //else penalize switch(state) { case 1: case 2: @@ -48,7 +49,10 @@ public class StateAndReward { String state = "OneStateToRuleThemAll2"; String angle_state = String.valueOf(discretize(angle, 5, -3.14/8,3.14/8)); String vy_state = String.valueOf(discretize(vy, 5, -1,1)) ; + //add extra emphasis on value of vy being very close to 0 + //but do not penalize if it cannot reach such value String vy_state_acc = String.valueOf(discretize(vy, 3, -0.1,0.1)); + //small emphasis on vx because having the angle be upright already compensate for vx String vx_state = String.valueOf(discretize(vx, 3, -0.5,0.5)); state = angle_state +"_"+ vy_state +"_" + vy_state_acc + "_"+ vx_state; return state; @@ -57,12 +61,12 @@ public class StateAndReward { /* Reward function for the full hover controller */ public static double getRewardHover(double angle, double vx, double vy) { - /* TODO: IMPLEMENT THIS FUNCTION */ - double reward = 0; String state = getStateHover(angle, vx, vy); + //Separate state to its component String[] state_str = state.split("_"); int [] state_int = new int[state_str.length]; + //Cast the strings to int for each comparison for (int i = 0; i< state_int.length; i++){ state_int[i] = Integer.valueOf(state_str[i]); } -- GitLab