Skip to content
Snippets Groups Projects
Commit 4978b3ba authored by Dennis Malmgren's avatar Dennis Malmgren
Browse files

Working on it

parent e3fd9686
Branches mdp_analysis
No related tags found
No related merge requests found
......@@ -184,7 +184,7 @@ def get_config():
parser.add_argument("--n_training_threads", type=int,
default=1, help="Number of torch threads for training")
parser.add_argument("--n_rollout_threads", type=int,
default=20, help="Number of parallel envs for training rollouts")
default=30, help="Number of parallel envs for training rollouts")
parser.add_argument("--n_eval_rollout_threads", type=int,
default=1, help="Number of parallel envs for evaluating rollouts")
parser.add_argument("--n_render_rollout_threads", type=int,
......@@ -267,7 +267,7 @@ def get_config():
parser.add_argument("--clip_param", type=float,
default=0.2, help='ppo clip parameter (default: 0.2)')
parser.add_argument("--num_mini_batch", type=int,
default=20, help='number of batches for ppo (default: 1)')
default=30, help='number of batches for ppo (default: 1)')
parser.add_argument("--entropy_coef", type=float,
default=0.01, help='entropy term coefficient (default: 0.01)')
parser.add_argument("--value_loss_coef", type=float,
......
......@@ -70,17 +70,17 @@ def make_train_env(all_args):
# }
reward_wrapper_config = {
# values= weights, only non zero weights will count to reward
"aircraft_share_detected": 0.01, # green detect red without
"share_delay_detected": 0.01, # After 300 timesteps, how many red agents detected
"share_radar_actions": 0.01, # Number of times radar used
"radar_share_detected":0.01, # green detect red with radar
"aircraft_share_detected": 0.0, # green detect red without
"share_delay_detected": 0.0, # After 300 timesteps, how many red agents detected
"share_radar_actions": 0.0, # Number of times radar used
"radar_share_detected":0.0, # green detect red with radar
"position_error": 0.0001, #
"position_delay_error": 0.0001, #After 300 timesteps,
'enemy_aircraft_share_detected': 0.01,
"enemy_share_delay_detected": 0.01,
"enemy_share_radar_actions": 0.01,
"enemy_radar_share_detected":0.01,
'enemy_aircraft_share_detected': 0.0,
"enemy_share_delay_detected": 0.0,
"enemy_share_radar_actions": 0.0,
"enemy_radar_share_detected":0.0,
"enemy_position_error": 0.0001,
"enemy_position_delay_error": 0.0001
}
......@@ -124,19 +124,19 @@ def make_eval_env(all_args):
reward_wrapper_config = {
# values= weights, only non zero weights will count to reward
"aircraft_share_detected": 1, # green detect red without
"share_delay_detected": 1, # After 300 timesteps, how many red agents detected
"share_radar_actions": 1, # Number of times radar used
"radar_share_detected":1, # green detect red with radar
"position_error": 1, #
"position_delay_error": 1, #After 300 timesteps,
"aircraft_share_detected": 0.0, # green detect red without
"share_delay_detected": 0.0, # After 300 timesteps, how many red agents detected
"share_radar_actions": 0.0, # Number of times radar used
"radar_share_detected":0.0, # green detect red with radar
"position_error": 0.0001, #
"position_delay_error": 0.0001, #After 300 timesteps,
'enemy_aircraft_share_detected': 1,
"enemy_share_delay_detected": 1,
"enemy_share_radar_actions": 1,
"enemy_radar_share_detected":1,
"enemy_position_error": 1,
"enemy_position_delay_error": 1
'enemy_aircraft_share_detected': 0.0,
"enemy_share_delay_detected": 0.0,
"enemy_share_radar_actions": 0.0,
"enemy_radar_share_detected":0.0,
"enemy_position_error": 0.0001,
"enemy_position_delay_error": 0.0001
}
red_behaviour_name = all_args.red_behaviour
rand_red_behaviour = all_args.rand_red_behaviour
......
......@@ -44,17 +44,25 @@ class ScalarizedRewardWrapper(gym.RewardWrapper):
def reward(self, reward: MultiAgentMultiObjectiveReward) -> MultiAgentReward:
# if self.eval_==True:
#position error is a negative number that increases in magnitude with the error.
#at the 'worst' it is -600.0
new_reward = dict({
agent_id: (0.01*reward[agent_id]['enemy_position_error']-0.01*reward[agent_id]['position_error'])
+ (0.01*reward[agent_id]['enemy_position_delay_error']-0.01*reward[agent_id]['position_delay_error'])
+ (reward[agent_id]['aircraft_share_detected']-reward[agent_id]['enemy_aircraft_share_detected'])
+ (reward[agent_id]['share_delay_detected']-reward[agent_id]['enemy_share_delay_detected'])
+ (0.1*reward[agent_id]['radar_share_detected']-0.1*reward[agent_id]['enemy_radar_share_detected'])
+ (0.1*reward[agent_id]['enemy_share_radar_actions']-0.1*reward[agent_id]['radar_share_detected'])
agent_id: (0.00001*(reward[agent_id]['position_error'] - reward[agent_id]['enemy_position_error']))
+ (0.00001*(reward[agent_id]['position_delay_error'] - reward[agent_id]['enemy_position_delay_error']))
for agent_id in reward
})
# if self.eval_==True:
# new_reward = dict({
# agent_id: (0.01*reward[agent_id]['enemy_position_error']-0.01*reward[agent_id]['position_error'])
# + (0.01*reward[agent_id]['enemy_position_delay_error']-0.01*reward[agent_id]['position_delay_error'])
# + (reward[agent_id]['aircraft_share_detected']-reward[agent_id]['enemy_aircraft_share_detected'])
# + (reward[agent_id]['share_delay_detected']-reward[agent_id]['enemy_share_delay_detected'])
# + (0.1*reward[agent_id]['radar_share_detected']-0.1*reward[agent_id]['enemy_radar_share_detected'])
# + (0.1*reward[agent_id]['enemy_share_radar_actions']-0.1*reward[agent_id]['radar_share_detected'])
# for agent_id in reward
# })
# else:
# new_reward = dict({
# # agent_id: (0.01*reward[agent_id]['enemy_position_error']-0.01*reward[agent_id]['position_error'])
......
......@@ -203,7 +203,7 @@ def evaluate_position(state: State) -> tuple[float, float]:
not_detected = [a_o.aircraft for a_o in state.green if a_o.aircraft.name not in detected_name]
for not_detected_aircraft in not_detected:
dist = 141
dist = 600
dist_red += dist
dist_green = 0
......@@ -219,7 +219,7 @@ def evaluate_position(state: State) -> tuple[float, float]:
not_detected = [a_o.aircraft for a_o in state.red if a_o.aircraft.name not in detected_name]
for not_detected_aircraft in not_detected:
dist = 141
dist = 600
dist_green += dist
dist_red = dist_red / (len(state.green) * len(state.red))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment