diff --git a/src/agent.py b/src/agent.py
index 804733518c99544de59836fea65b8c9853db4d82..fa21642664b8d5765847251683b7d19a67cd2a24 100644
--- a/src/agent.py
+++ b/src/agent.py
@@ -40,6 +40,7 @@ class Memory():
 
 
     def add(self, transition):
+        """ Add transition to memory buffer, if full, replace oldest transition """
         self.memory[self.curr_index % self.max_capacity] = transition
         self.curr_index = self.curr_index + 1
         self.size = min(self.curr_index, self.max_capacity)
@@ -215,6 +216,7 @@ class Agent():
 
 
     def get_velocity_state(self):
+        """ Get linear and angular velocies of the robot and return """
         odom = rospy.wait_for_message("/odometry/filtered", Odometry, timeout=5)
         linear = odom.twist.twist.linear.x
         angular = odom.twist.twist.angular.z
@@ -223,7 +225,7 @@ class Agent():
 
     def get_goal_state(self):
         """ Get goal state 
-            Currently assumes flat ground (2D) """
+            Assumes flat ground (2D) """
         position, orientation = self.get_position()
         diff_x = self.goal_x - position.x
         diff_y = self.goal_y - position.y
@@ -275,20 +277,20 @@ class Agent():
             self.has_arrived = True
             print("GOAL REACHED!!!")
 
-        # Perform some scaling
+        # Normalize values and set state
         self.state = np.concatenate((laser_scan / self.max_distance, velocity, goal_state / np.array([self.max_goal_dist, np.pi])))
 
 
-    # TODO: Add OU noise and clip to velocity intervals
     def choose_action(self, state):
+        """ Pick an action """
 
+        # Picks action from the actor, use during evaluation
         tf_action = self.network.actor(tf.expand_dims(tf.convert_to_tensor(state), 0))
         action = tf.squeeze(tf_action).numpy()
 
-        #action[0] = (action[0] + np.random.uniform(0.0, 1.0)) * 0.25
-        #action[1] = (action[1] + np.random.uniform(-0.5, 0.5)) * 0.335
-
-        #action = np.array([np.random.uniform(0.0, 0.25), np.random.uniform(-0.5, 0.5)])
+        # Adds random noise for exploration, use during training
+        #action[0] = (action[0] + np.random.uniform(0.0, 1.0))
+        #action[1] = (action[1] + np.random.uniform(-0.5, 0.5))
 
         return action * np.array([0.35, 0.35])
 
@@ -309,6 +311,8 @@ class Agent():
 
     
     def train_step(self):
+        """ One update iteration of the networks """
+
         batch = self.memory.sample()
         states = batch[:,:15]
         actions = batch[:,15:17]
@@ -347,7 +351,7 @@ class Agent():
         done = self.has_arrived or self.has_crashed
         self.store_transition(state, action, reward, copy.deepcopy(self.state), done)
 
-        # Update Networks
+        # Update Networks, comment out during evaluation
         #self.train_step()
 
         return reward, done
diff --git a/src/networks.py b/src/networks.py
index 060f49287a71fece9097144b414bc925aa143f02..afb42879a59eee2fd4980cdcc389e2c15f0518f2 100644
--- a/src/networks.py
+++ b/src/networks.py
@@ -64,6 +64,7 @@ class ActorCritic():
         return model
 
 
+    # Update critic and actor networks 
     # Source: https://keras.io/examples/rl/ddpg_pendulum/
     def update_networks(self, states, actions, rewards, new_states, dones):
 
@@ -94,6 +95,8 @@ class ActorCritic():
         self.actor_optimizer.apply_gradients(zip(actor_grad, self.actor.trainable_variables))
 
 
+    # Update target networks
+    # Source: https://github.com/philtabor/Youtube-Code-Repository/tree/master/ReinforcementLearning/PolicyGradient/DDPG/tensorflow2/pendulum
     def update_target_networks(self):
         """ Updates target actor and critic """
 
@@ -124,35 +127,3 @@ class ActorCritic():
         self.target_actor.load_weights(TARGET_ACTOR_FILE_PATH)
         self.target_critic.load_weights(TARGET_CRITIC_FILE_PATH)
         print("Weights loaded.")
-
-
-if __name__ == "__main__":
-
-    # Test code
-    ac = ActorCritic(12, 0.005, 1)
-
-    # Load weights
-    ac.load_weights_from_file()
-
-    # Toy state
-    states = np.array([[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0],
-                      [12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0],
-                      [24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0]])
-
-    actions = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])
-
-    # Forward propagate actor and critic networks
-    actions = ac.actor(states)
-    y = ac.critic([states, actions])
-
-    # Test target updates
-    ac.update_target_networks()
-
-    # Save weights
-    ac.save_weights_to_file()
-
-    print(y)
-    print("PASS")
-
-
-
diff --git a/src/run.py b/src/run.py
index 8aef7892622b2043e7754dd3529ca78257254421..0f9c9993c06a06f9838056a6a78159b8bdeac99b 100644
--- a/src/run.py
+++ b/src/run.py
@@ -12,7 +12,7 @@ episode_rewards = []
 
 """
 
-# Pre training
+# Pre training, comment out when finished
 for i in range(1000):
     agent.train_step()
     print("Step: {}".format(i))
@@ -21,6 +21,7 @@ agent.save_weights()
 
 """
 
+# Main program
 for episode in range(1, NUM_EPISODES + 1):
 
     episode_reward = 0.0
@@ -36,12 +37,15 @@ for episode in range(1, NUM_EPISODES + 1):
             break
 
     episode_rewards.append(episode_reward)
+
+    # Moving average of past 40 episodes
     avg_reward = np.mean(episode_rewards[-min(40, episode):])
     print("End of episode:", episode)
     print("Average reward:", avg_reward)
     print()
 
     """
+    # Comment out during evaluation
     if episode % 3 == 0:
         agent.memory.save_memory_to_file()
         agent.save_weights()