Comment the code

cc717963 · Marcus Gandal · c723cb51 · cc717963 · cc717963 · cc717963
Commit cc717963 authored 2 years ago by Marcus Gandal
--- a/src/agent.py
+++ b/src/agent.py
@@ -40,6 +40,7 @@ class Memory():


    def add(self, transition):
+        """ Add transition to memory buffer, if full, replace oldest transition """
        self.memory[self.curr_index % self.max_capacity] = transition
        self.curr_index = self.curr_index + 1
        self.size = min(self.curr_index, self.max_capacity)
@@ -215,6 +216,7 @@ class Agent():


    def get_velocity_state(self):
+        """ Get linear and angular velocies of the robot and return """
        odom = rospy.wait_for_message("/odometry/filtered", Odometry, timeout=5)
        linear = odom.twist.twist.linear.x
        angular = odom.twist.twist.angular.z
@@ -223,7 +225,7 @@ class Agent():

    def get_goal_state(self):
        """ Get goal state 
-            Currently assumes flat ground (2D) """
+            Assumes flat ground (2D) """
        position, orientation = self.get_position()
        diff_x = self.goal_x - position.x
        diff_y = self.goal_y - position.y
@@ -275,20 +277,20 @@ class Agent():
            self.has_arrived = True
            print("GOAL REACHED!!!")

-        # Perform some scaling
+        # Normalize values and set state
        self.state = np.concatenate((laser_scan / self.max_distance, velocity, goal_state / np.array([self.max_goal_dist, np.pi])))


-    # TODO: Add OU noise and clip to velocity intervals
    def choose_action(self, state):
+        """ Pick an action """

+        # Picks action from the actor, use during evaluation
        tf_action = self.network.actor(tf.expand_dims(tf.convert_to_tensor(state), 0))
        action = tf.squeeze(tf_action).numpy()

-        #action[0] = (action[0] + np.random.uniform(0.0, 1.0)) * 0.25
-        #action[1] = (action[1] + np.random.uniform(-0.5, 0.5)) * 0.335
-
-        #action = np.array([np.random.uniform(0.0, 0.25), np.random.uniform(-0.5, 0.5)])
+        # Adds random noise for exploration, use during training
+        #action[0] = (action[0] + np.random.uniform(0.0, 1.0))
+        #action[1] = (action[1] + np.random.uniform(-0.5, 0.5))

        return action * np.array([0.35, 0.35])

@@ -309,6 +311,8 @@ class Agent():

    
    def train_step(self):
+        """ One update iteration of the networks """
+
        batch = self.memory.sample()
        states = batch[:,:15]
        actions = batch[:,15:17]
@@ -347,7 +351,7 @@ class Agent():
        done = self.has_arrived or self.has_crashed
        self.store_transition(state, action, reward, copy.deepcopy(self.state), done)

-        # Update Networks
+        # Update Networks, comment out during evaluation
        #self.train_step()

        return reward, done
--- a/src/networks.py
+++ b/src/networks.py
@@ -64,6 +64,7 @@ class ActorCritic():
        return model


+    # Update critic and actor networks 
    # Source: https://keras.io/examples/rl/ddpg_pendulum/
    def update_networks(self, states, actions, rewards, new_states, dones):

@@ -94,6 +95,8 @@ class ActorCritic():
        self.actor_optimizer.apply_gradients(zip(actor_grad, self.actor.trainable_variables))


+    # Update target networks
+    # Source: https://github.com/philtabor/Youtube-Code-Repository/tree/master/ReinforcementLearning/PolicyGradient/DDPG/tensorflow2/pendulum
    def update_target_networks(self):
        """ Updates target actor and critic """

@@ -124,35 +127,3 @@ class ActorCritic():
        self.target_actor.load_weights(TARGET_ACTOR_FILE_PATH)
        self.target_critic.load_weights(TARGET_CRITIC_FILE_PATH)
        print("Weights loaded.")
-
-
-if __name__ == "__main__":
-
-    # Test code
-    ac = ActorCritic(12, 0.005, 1)
-
-    # Load weights
-    ac.load_weights_from_file()
-
-    # Toy state
-    states = np.array([[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0],
-                      [12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0],
-                      [24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0]])
-
-    actions = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])
-
-    # Forward propagate actor and critic networks
-    actions = ac.actor(states)
-    y = ac.critic([states, actions])
-
-    # Test target updates
-    ac.update_target_networks()
-
-    # Save weights
-    ac.save_weights_to_file()
-
-    print(y)
-    print("PASS")
-
-
-
--- a/src/run.py
+++ b/src/run.py
@@ -12,7 +12,7 @@ episode_rewards = []

 """

-# Pre training
+# Pre training, comment out when finished
 for i in range(1000):
    agent.train_step()
    print("Step: {}".format(i))
@@ -21,6 +21,7 @@ agent.save_weights()

 """

+# Main program
 for episode in range(1, NUM_EPISODES + 1):

    episode_reward = 0.0
@@ -36,12 +37,15 @@ for episode in range(1, NUM_EPISODES + 1):
            break

    episode_rewards.append(episode_reward)
+
+    # Moving average of past 40 episodes
    avg_reward = np.mean(episode_rewards[-min(40, episode):])
    print("End of episode:", episode)
    print("Average reward:", avg_reward)
    print()

    """
+    # Comment out during evaluation
    if episode % 3 == 0:
        agent.memory.save_memory_to_file()
        agent.save_weights()