Skip to content
Snippets Groups Projects
Commit cc717963 authored by Marcus Gandal's avatar Marcus Gandal
Browse files

Comment the code

parent c723cb51
No related branches found
No related tags found
No related merge requests found
......@@ -40,6 +40,7 @@ class Memory():
def add(self, transition):
""" Add transition to memory buffer, if full, replace oldest transition """
self.memory[self.curr_index % self.max_capacity] = transition
self.curr_index = self.curr_index + 1
self.size = min(self.curr_index, self.max_capacity)
......@@ -215,6 +216,7 @@ class Agent():
def get_velocity_state(self):
""" Get linear and angular velocies of the robot and return """
odom = rospy.wait_for_message("/odometry/filtered", Odometry, timeout=5)
linear = odom.twist.twist.linear.x
angular = odom.twist.twist.angular.z
......@@ -223,7 +225,7 @@ class Agent():
def get_goal_state(self):
""" Get goal state
Currently assumes flat ground (2D) """
Assumes flat ground (2D) """
position, orientation = self.get_position()
diff_x = self.goal_x - position.x
diff_y = self.goal_y - position.y
......@@ -275,20 +277,20 @@ class Agent():
self.has_arrived = True
print("GOAL REACHED!!!")
# Perform some scaling
# Normalize values and set state
self.state = np.concatenate((laser_scan / self.max_distance, velocity, goal_state / np.array([self.max_goal_dist, np.pi])))
# TODO: Add OU noise and clip to velocity intervals
def choose_action(self, state):
""" Pick an action """
# Picks action from the actor, use during evaluation
tf_action = self.network.actor(tf.expand_dims(tf.convert_to_tensor(state), 0))
action = tf.squeeze(tf_action).numpy()
#action[0] = (action[0] + np.random.uniform(0.0, 1.0)) * 0.25
#action[1] = (action[1] + np.random.uniform(-0.5, 0.5)) * 0.335
#action = np.array([np.random.uniform(0.0, 0.25), np.random.uniform(-0.5, 0.5)])
# Adds random noise for exploration, use during training
#action[0] = (action[0] + np.random.uniform(0.0, 1.0))
#action[1] = (action[1] + np.random.uniform(-0.5, 0.5))
return action * np.array([0.35, 0.35])
......@@ -309,6 +311,8 @@ class Agent():
def train_step(self):
""" One update iteration of the networks """
batch = self.memory.sample()
states = batch[:,:15]
actions = batch[:,15:17]
......@@ -347,7 +351,7 @@ class Agent():
done = self.has_arrived or self.has_crashed
self.store_transition(state, action, reward, copy.deepcopy(self.state), done)
# Update Networks
# Update Networks, comment out during evaluation
#self.train_step()
return reward, done
......@@ -64,6 +64,7 @@ class ActorCritic():
return model
# Update critic and actor networks
# Source: https://keras.io/examples/rl/ddpg_pendulum/
def update_networks(self, states, actions, rewards, new_states, dones):
......@@ -94,6 +95,8 @@ class ActorCritic():
self.actor_optimizer.apply_gradients(zip(actor_grad, self.actor.trainable_variables))
# Update target networks
# Source: https://github.com/philtabor/Youtube-Code-Repository/tree/master/ReinforcementLearning/PolicyGradient/DDPG/tensorflow2/pendulum
def update_target_networks(self):
""" Updates target actor and critic """
......@@ -124,35 +127,3 @@ class ActorCritic():
self.target_actor.load_weights(TARGET_ACTOR_FILE_PATH)
self.target_critic.load_weights(TARGET_CRITIC_FILE_PATH)
print("Weights loaded.")
if __name__ == "__main__":
# Test code
ac = ActorCritic(12, 0.005, 1)
# Load weights
ac.load_weights_from_file()
# Toy state
states = np.array([[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0],
[12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0],
[24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0]])
actions = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])
# Forward propagate actor and critic networks
actions = ac.actor(states)
y = ac.critic([states, actions])
# Test target updates
ac.update_target_networks()
# Save weights
ac.save_weights_to_file()
print(y)
print("PASS")
......@@ -12,7 +12,7 @@ episode_rewards = []
"""
# Pre training
# Pre training, comment out when finished
for i in range(1000):
agent.train_step()
print("Step: {}".format(i))
......@@ -21,6 +21,7 @@ agent.save_weights()
"""
# Main program
for episode in range(1, NUM_EPISODES + 1):
episode_reward = 0.0
......@@ -36,12 +37,15 @@ for episode in range(1, NUM_EPISODES + 1):
break
episode_rewards.append(episode_reward)
# Moving average of past 40 episodes
avg_reward = np.mean(episode_rewards[-min(40, episode):])
print("End of episode:", episode)
print("Average reward:", avg_reward)
print()
"""
# Comment out during evaluation
if episode % 3 == 0:
agent.memory.save_memory_to_file()
agent.save_weights()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment