From 6fb04eef3c391c036579251dd63725eaac476bc7 Mon Sep 17 00:00:00 2001
From: nambh713 <nambh713@su02-108.ad.liu.se>
Date: Thu, 17 Oct 2019 09:33:11 +0200
Subject: [PATCH] added comments again

---
 QLearning/bin/StateAndReward.class | Bin 2878 -> 2878 bytes
 QLearning/src/StateAndReward.java  |  10 +++++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/QLearning/bin/StateAndReward.class b/QLearning/bin/StateAndReward.class
index ce9df9a9c261d08fdc1ee95fc1cfcf196a454b97..bd18e9881548d9eb85ea11a342fe3672d576af21 100644
GIT binary patch
delta 282
zcmdldwohzB4=ba<<X+aPk`fHe43Z2246+Oc3<?aU42ldE3@Qw^44RXz*`7!mGO#ch
zF^Di2Gbl5dF_<t|Ft{*SG88jdZMI;KWvq8#U}kV+;9+oP5M^*-kYR9RP-1XrFk*0H
zux0ROaAoje@MZ922w?DINMP`1$Y2Oy$Yuy)C}s#_sAdRfXl96DXk&<En8pyrFpnXc
zVIe~-!%BukhK&qK3_BT;8TK%wG8|&aWH`l;&2Wh!hv6zi9>ZORLI!RI28JL84hDwF
zBAnM$n;BRcS{T?F+8KBmIvIo+x)?+mdKhFFdKnZLCNQWnOk~hum^_)EYpv`e1~!Hz
h3_J`=8Tc5MGl(**V31^3%^=6HhC!8K-Q=5GsQ{}iHR=EW

delta 282
zcmdldwohzB4=W@8<X+aPlHv@^3=#|i3^EJ`4Dt-73<?Yu49X0)3>uTI*`7!mFt9Kf
zGKer3F(@+_Gng=#Ft{+7G88kIZMI;KWvsVjU}msq;9+oJ5M^*=kYR9QP-1XqFk*0G
zuw`&(aAojd@MZ912w?DHNMP`0$YAhc$Y$_kC}s#^sAdReXl4juXk!Rvn8pysFpnXe
zVIe~#!%Bu&hK&qy3_BU(8TK$FG8|$^WjMu<&Txq#gW)Pe7Q<bJTn26i28JL84hDwF
zBAnM$>ls)X8W`9ani+T*S{Z~H+89I`Iv8XaIvErgdKgq0dKq*W`X}>qt(Bd}z{aqE
hfrnus10Ta;22qA343Z4X8RQsNFsL%DntYQh6#z#ZH5LE>

diff --git a/QLearning/src/StateAndReward.java b/QLearning/src/StateAndReward.java
index 035c544..1986fa6 100644
--- a/QLearning/src/StateAndReward.java
+++ b/QLearning/src/StateAndReward.java
@@ -12,6 +12,7 @@ public class StateAndReward {
 		//State 3 is veering right
 		//State 1,2 is quite good as it is within Pi/8 angle from normal
 		//state = String.valueOf(angle);
+		//concate _S to make the state more readable in terminal
 		return state.concat("_S");
 	}
 
@@ -24,7 +25,7 @@ public class StateAndReward {
 		int state = discretize(angle, 4, -3.14/8,3.14/8);
 
 		//if state is 1 or 2, reward with positives
-		//else -5
+		//else penalize
 		switch(state) {
 		case 1:
 		case 2:
@@ -48,7 +49,10 @@ public class StateAndReward {
 		String state = "OneStateToRuleThemAll2";
 		String angle_state = String.valueOf(discretize(angle, 5, -3.14/8,3.14/8));
 		String vy_state = String.valueOf(discretize(vy, 5, -1,1)) ;
+		//add extra emphasis on value of vy being very close to 0
+		//but do not penalize if it cannot reach such value
 		String vy_state_acc = String.valueOf(discretize(vy, 3, -0.1,0.1));
+		//small emphasis on vx because having the angle be upright already compensate for vx
 		String vx_state = String.valueOf(discretize(vx, 3, -0.5,0.5));
 		state = angle_state +"_"+ vy_state +"_" + vy_state_acc + "_"+ vx_state;
 		return state;
@@ -57,12 +61,12 @@ public class StateAndReward {
 	/* Reward function for the full hover controller */
 	public static double getRewardHover(double angle, double vx, double vy) {
 
-		/* TODO: IMPLEMENT THIS FUNCTION */
-
 		double reward = 0;
 		String state = getStateHover(angle, vx, vy);
+		//Separate state to its component
 		String[] state_str = state.split("_");
 		int [] state_int = new int[state_str.length];
+		//Cast the strings to int for each comparison
 		for (int i = 0; i< state_int.length; i++){
 			state_int[i] = Integer.valueOf(state_str[i]);
 		}
-- 
GitLab