diff --git a/configs/config.py b/configs/config.py index 8b6141b16b661328e7a9d5798472d3809f0a85cc..3c855d9d75234e697cd9287d7d9fb47bf7dbb696 100644 --- a/configs/config.py +++ b/configs/config.py @@ -184,7 +184,7 @@ def get_config(): parser.add_argument("--n_training_threads", type=int, default=1, help="Number of torch threads for training") parser.add_argument("--n_rollout_threads", type=int, - default=20, help="Number of parallel envs for training rollouts") + default=30, help="Number of parallel envs for training rollouts") parser.add_argument("--n_eval_rollout_threads", type=int, default=1, help="Number of parallel envs for evaluating rollouts") parser.add_argument("--n_render_rollout_threads", type=int, @@ -267,7 +267,7 @@ def get_config(): parser.add_argument("--clip_param", type=float, default=0.2, help='ppo clip parameter (default: 0.2)') parser.add_argument("--num_mini_batch", type=int, - default=20, help='number of batches for ppo (default: 1)') + default=30, help='number of batches for ppo (default: 1)') parser.add_argument("--entropy_coef", type=float, default=0.01, help='entropy term coefficient (default: 0.01)') parser.add_argument("--value_loss_coef", type=float, diff --git a/multiagent_rl_train.py b/multiagent_rl_train.py index f6c3052b031cbc4231262eab7a97a45c3f68cf67..45fced60b861373ffb74ad3cba1b1a2c79982229 100644 --- a/multiagent_rl_train.py +++ b/multiagent_rl_train.py @@ -70,17 +70,17 @@ def make_train_env(all_args): # } reward_wrapper_config = { # values= weights, only non zero weights will count to reward - "aircraft_share_detected": 0.01, # green detect red without - "share_delay_detected": 0.01, # After 300 timesteps, how many red agents detected - "share_radar_actions": 0.01, # Number of times radar used - "radar_share_detected":0.01, # green detect red with radar + "aircraft_share_detected": 0.0, # green detect red without + "share_delay_detected": 0.0, # After 300 timesteps, how many red agents detected + "share_radar_actions": 0.0, # Number of times radar used + "radar_share_detected":0.0, # green detect red with radar "position_error": 0.0001, # "position_delay_error": 0.0001, #After 300 timesteps, - 'enemy_aircraft_share_detected': 0.01, - "enemy_share_delay_detected": 0.01, - "enemy_share_radar_actions": 0.01, - "enemy_radar_share_detected":0.01, + 'enemy_aircraft_share_detected': 0.0, + "enemy_share_delay_detected": 0.0, + "enemy_share_radar_actions": 0.0, + "enemy_radar_share_detected":0.0, "enemy_position_error": 0.0001, "enemy_position_delay_error": 0.0001 } @@ -124,19 +124,19 @@ def make_eval_env(all_args): reward_wrapper_config = { # values= weights, only non zero weights will count to reward - "aircraft_share_detected": 1, # green detect red without - "share_delay_detected": 1, # After 300 timesteps, how many red agents detected - "share_radar_actions": 1, # Number of times radar used - "radar_share_detected":1, # green detect red with radar - "position_error": 1, # - "position_delay_error": 1, #After 300 timesteps, + "aircraft_share_detected": 0.0, # green detect red without + "share_delay_detected": 0.0, # After 300 timesteps, how many red agents detected + "share_radar_actions": 0.0, # Number of times radar used + "radar_share_detected":0.0, # green detect red with radar + "position_error": 0.0001, # + "position_delay_error": 0.0001, #After 300 timesteps, - 'enemy_aircraft_share_detected': 1, - "enemy_share_delay_detected": 1, - "enemy_share_radar_actions": 1, - "enemy_radar_share_detected":1, - "enemy_position_error": 1, - "enemy_position_delay_error": 1 + 'enemy_aircraft_share_detected': 0.0, + "enemy_share_delay_detected": 0.0, + "enemy_share_radar_actions": 0.0, + "enemy_radar_share_detected":0.0, + "enemy_position_error": 0.0001, + "enemy_position_delay_error": 0.0001 } red_behaviour_name = all_args.red_behaviour rand_red_behaviour = all_args.rand_red_behaviour diff --git a/scalarized_reward_wrapper.py b/scalarized_reward_wrapper.py index 13c44a2b79746863916925513261a74967897486..bc25885536c47533ec9a5242148583cc5d606105 100644 --- a/scalarized_reward_wrapper.py +++ b/scalarized_reward_wrapper.py @@ -44,17 +44,25 @@ class ScalarizedRewardWrapper(gym.RewardWrapper): def reward(self, reward: MultiAgentMultiObjectiveReward) -> MultiAgentReward: - # if self.eval_==True: + #position error is a negative number that increases in magnitude with the error. + #at the 'worst' it is -600.0 new_reward = dict({ - agent_id: (0.01*reward[agent_id]['enemy_position_error']-0.01*reward[agent_id]['position_error']) - + (0.01*reward[agent_id]['enemy_position_delay_error']-0.01*reward[agent_id]['position_delay_error']) - + (reward[agent_id]['aircraft_share_detected']-reward[agent_id]['enemy_aircraft_share_detected']) - + (reward[agent_id]['share_delay_detected']-reward[agent_id]['enemy_share_delay_detected']) - + (0.1*reward[agent_id]['radar_share_detected']-0.1*reward[agent_id]['enemy_radar_share_detected']) - + (0.1*reward[agent_id]['enemy_share_radar_actions']-0.1*reward[agent_id]['radar_share_detected']) + agent_id: (0.00001*(reward[agent_id]['position_error'] - reward[agent_id]['enemy_position_error'])) + + (0.00001*(reward[agent_id]['position_delay_error'] - reward[agent_id]['enemy_position_delay_error'])) for agent_id in reward }) + # if self.eval_==True: + # new_reward = dict({ + # agent_id: (0.01*reward[agent_id]['enemy_position_error']-0.01*reward[agent_id]['position_error']) + # + (0.01*reward[agent_id]['enemy_position_delay_error']-0.01*reward[agent_id]['position_delay_error']) + # + (reward[agent_id]['aircraft_share_detected']-reward[agent_id]['enemy_aircraft_share_detected']) + # + (reward[agent_id]['share_delay_detected']-reward[agent_id]['enemy_share_delay_detected']) + # + (0.1*reward[agent_id]['radar_share_detected']-0.1*reward[agent_id]['enemy_radar_share_detected']) + # + (0.1*reward[agent_id]['enemy_share_radar_actions']-0.1*reward[agent_id]['radar_share_detected']) + # for agent_id in reward + # }) + # else: # new_reward = dict({ # # agent_id: (0.01*reward[agent_id]['enemy_position_error']-0.01*reward[agent_id]['position_error']) diff --git a/scenario.py b/scenario.py index 54a35a9c07ae900132bafac0d1799c4ad101b83a..a713ad88aae401acde7454efd5a2d66d2a5830b5 100644 --- a/scenario.py +++ b/scenario.py @@ -203,7 +203,7 @@ def evaluate_position(state: State) -> tuple[float, float]: not_detected = [a_o.aircraft for a_o in state.green if a_o.aircraft.name not in detected_name] for not_detected_aircraft in not_detected: - dist = 141 + dist = 600 dist_red += dist dist_green = 0 @@ -219,7 +219,7 @@ def evaluate_position(state: State) -> tuple[float, float]: not_detected = [a_o.aircraft for a_o in state.red if a_o.aircraft.name not in detected_name] for not_detected_aircraft in not_detected: - dist = 141 + dist = 600 dist_green += dist dist_red = dist_red / (len(state.green) * len(state.red))