diff --git a/algsel_behaviour.py b/algsel_behaviour.py index 8721b4db818f55b30b7beea5983ea015efb651c5..2a56e4ad6e65ff6ea4211ac985c3c88aea36632d 100644 --- a/algsel_behaviour.py +++ b/algsel_behaviour.py @@ -121,8 +121,8 @@ likelihood[9,7] = 7/60 def update_behaviour_probs(E, pb): pe = np.dot(likelihood[:,E].T, pb) - - pb = np.multiply(likelihood[:,E], pb) / pe + if pe != 0: + pb = np.multiply(likelihood[:,E], pb) / pe return pb diff --git a/behaviour.py b/behaviour.py index db64ae5739679089e1fe446f463f9b2abaf94845..0e3f3645b8b973150e04f322aa4ba7edc0c3b44e 100644 --- a/behaviour.py +++ b/behaviour.py @@ -150,14 +150,23 @@ class Mip(Behaviour): def __init__(self): self.observations = dict() self.common_observations = dict() + self.radar_obs = dict() # used in algsel self.radar_obs_sum = 0 self.observed_aircrafts = set() ''' common_observations = { - r1: ((pos, time), (pos, time)), - r2: ((pos, time)), - r4: ((pos, time), (pos, time)) + r1: { + t1: ((pos, type), (pos, type), (pos, type)) + t2: ((pos, type), (pos, type), (pos, type), (pos, type)) + } + r2: { + t1: ((pos, type), (pos, type)) + t2: ((pos, type)) + } + r4: { + t1: ((pos, type), (pos, type)) + } } ''' @@ -178,30 +187,44 @@ class Mip(Behaviour): for enemy_obs in self.observations[aircraft.aircraft.name][sa.aircraft.name]: # Check if observations should be added to common_observations if enemy_obs[1] <= state.time - 5: + if sa.sa_type == 'radar': + self.observed_aircrafts.add(sa.aircraft.name) + if sa.aircraft.name in self.common_observations: - if sa.observed_time not in [obs[1] for obs in self.common_observations[sa.aircraft.name]]: - self.common_observations[sa.aircraft.name] += ((sa.aircraft.position, sa.observed_time),) + if sa.observed_time in self.common_observations[sa.aircraft.name]: + if (sa.aircraft.position, sa.sa_type) not in self.common_observations[sa.aircraft.name][sa.observed_time]: + self.common_observations[sa.aircraft.name][sa.observed_time] += ((sa.aircraft.position, sa.sa_type),) + else: + self.common_observations[sa.aircraft.name][sa.observed_time] = ((sa.aircraft.position, sa.sa_type),) if sa.aircraft.name not in self.observed_aircrafts and sa.sa_type == 'radar': self.observed_aircrafts.add(sa.aircraft.name) - length = len(self.common_observations[sa.aircraft.name]) - if length > 2: - self.common_observations[sa.aircraft.name] = sorted(self.common_observations[sa.aircraft.name], key=lambda x: x[1]) # check if this can be improved - self.common_observations[sa.aircraft.name] = self.common_observations[sa.aircraft.name][length-2:] + length = len(self.common_observations[sa.aircraft.name]) + if length > 2: + time_steps_to_delete = sorted([t for t in self.common_observations[sa.aircraft.name]], reverse=True)[2:] + for t in time_steps_to_delete: + self.common_observations[sa.aircraft.name].pop(t, None) + else: - self.common_observations[sa.aircraft.name] = ((sa.aircraft.position, sa.observed_time),) + self.common_observations[sa.aircraft.name] = dict() + + time_steps = [obs[1] for obs in self.observations[aircraft.aircraft.name][sa.aircraft.name]] + if sa.observed_time in time_steps: + obs_index = time_steps.index(sa.observed_time) + if self.observations[aircraft.aircraft.name][sa.aircraft.name][obs_index][2] == 'radar' and sa.sa_type == 'aircraft': + self.observations[aircraft.aircraft.name][sa.aircraft.name][obs_index] = (sa.aircraft.position, sa.observed_time, sa.sa_type) - if (sa.aircraft.position, sa.observed_time) in self.observations[aircraft.aircraft.name][sa.aircraft.name]: continue if sa.observed_time == state.time and sa.sa_type == 'radar': radar_obs_num.add(sa.aircraft.name) - self.observations[aircraft.aircraft.name][sa.aircraft.name] += ((sa.aircraft.position, sa.observed_time),) + self.observations[aircraft.aircraft.name][sa.aircraft.name] += ((sa.aircraft.position, sa.observed_time, sa.sa_type),) length = len(self.observations[aircraft.aircraft.name][sa.aircraft.name]) + self.observations[aircraft.aircraft.name][sa.aircraft.name] = sorted(self.observations[aircraft.aircraft.name][sa.aircraft.name], key=lambda x: x[1]) + if length > 6: - self.observations[aircraft.aircraft.name][sa.aircraft.name] = sorted(self.observations[aircraft.aircraft.name][sa.aircraft.name], key=lambda x: x[1]) self.observations[aircraft.aircraft.name][sa.aircraft.name] = self.observations[aircraft.aircraft.name][sa.aircraft.name][length-6:] self.radar_obs_sum = len(radar_obs_num) @@ -219,17 +242,16 @@ class Mip(Behaviour): self.actions = actions return self.actions - + class Rl(Behaviour): def __init__(self, state: State, team: str, model=None) -> None: - print(team) if model: self.runner = init(state, model, team) else: - self.runner = init(state, './trained_models/9191_fournights/run1/models', team) + self.runner = init(state, './trained_models/7749_threenights/run1/models', team) def act(self, state: ObservedState, from_algsel=False) -> tuple[RadarAction, ...]: actions = self.runner.update(state) @@ -240,8 +262,8 @@ class Algorithm_selection(Behaviour): def __init__(self, state: State, team: str) -> None: self.mip = Mip() - self.rl_simple_one = Rl(state, './trained_models/9191_fournights/run1/models', team) - self.rl_no = Rl(state, './trained_models/4857_vsNO_4nights/run1/models', team) + self.rl_simple_one = Rl(state, team, './trained_models/7749_threenights/run1/models') + self.rl_no = Rl(state, team, './trained_models/4857_vsNO_4nights/run1/models') self.select_behaviour = { "no": self.rl_no, "simpleone": self.rl_simple_one, diff --git a/configs/model_configs.py b/configs/model_configs.py index e3d45e4766d1cd40be5af3aa6bfbf8a77dfab78a..be1552393b599e8f83d4d00301ead26db0482240 100644 --- a/configs/model_configs.py +++ b/configs/model_configs.py @@ -48,6 +48,13 @@ configs = { 'hidden_size': 64, 'layer_N': 2 }, + './trained_models/7749_threenights/run1/models': { + 'stacked_frames': 100, + 'use_stacked_frames': True, + 'hidden_size': 64, + 'layer_N': 2, + 'actions': [(0, 0), (-75, -25), (-25, 25), (25, 75)] + }, './trained_models/9191_fournights/run1/models': { 'stacked_frames': 100, 'use_stacked_frames': True, diff --git a/scenario.py b/scenario.py index dcf8e544161928bbdf06cc7cccb5149b6e52763a..7eb3d767446236afd18776024e085c4b3054263b 100644 --- a/scenario.py +++ b/scenario.py @@ -13,6 +13,7 @@ import math from typing import Optional, Any import logging import time +import numpy as np from data import ObservedState, State, Aircraft, RadarAction, AircraftObs, AircraftObj, AircraftPos, SaAircraft import constants @@ -283,6 +284,7 @@ def evaluate_position(state: State) -> tuple[float, float]: dist_red = 0 for aircraft_obj in state.red: detected_name = set() + distances = dict() # stores distances to each sa_aircraft in lists to choose the shortest distance if multiple sa_types of same aircraft exists assert len(aircraft_obj.sa) <= len(state.green)*2 for sa_aircraft_obj in aircraft_obj.sa: real_aircraft = aircraft_dict[sa_aircraft_obj.aircraft.name] @@ -290,7 +292,12 @@ def evaluate_position(state: State) -> tuple[float, float]: assert real_aircraft.name == sa_aircraft.name detected_name.add(sa_aircraft.name) dist = ((real_aircraft.position.x - sa_aircraft.position.x) ** 2 + (real_aircraft.position.y - sa_aircraft.position.y) ** 2) ** 0.5 - dist_red += dist + if sa_aircraft.name in distances: + distances[sa_aircraft.name].append(dist) + else: + distances[sa_aircraft.name] = [dist] + + dist_red += sum([min(distances[enemy]) for enemy in distances]) not_detected = [a_o.aircraft for a_o in state.green if a_o.aircraft.name not in detected_name] for not_detected_aircraft in not_detected: @@ -300,13 +307,19 @@ def evaluate_position(state: State) -> tuple[float, float]: dist_green = 0 for aircraft_obj in state.green: detected_name = set() + distances = dict() # stores distances to each sa_aircraft in lists to choose the shortest distance if multiple sa_types of same aircraft exists for sa_aircraft_obj in aircraft_obj.sa: real_aircraft = aircraft_dict[sa_aircraft_obj.aircraft.name] sa_aircraft = sa_aircraft_obj.aircraft assert real_aircraft.name == sa_aircraft.name detected_name.add(sa_aircraft.name) dist = ((real_aircraft.position.x - sa_aircraft.position.x) ** 2 + (real_aircraft.position.y - sa_aircraft.position.y) ** 2) ** 0.5 - dist_green += dist + if sa_aircraft.name in distances: + distances[sa_aircraft.name].append(dist) + else: + distances[sa_aircraft.name] = [dist] + + dist_green += sum([min(distances[enemy]) for enemy in distances]) not_detected = [a_o.aircraft for a_o in state.red if a_o.aircraft.name not in detected_name] for not_detected_aircraft in not_detected: @@ -319,6 +332,7 @@ def evaluate_position(state: State) -> tuple[float, float]: return -dist_green, -dist_red + def evaluate_position_delay(state: State, delay=300) -> tuple[float, float]: if state.time >= delay: return evaluate_position(state) @@ -326,6 +340,70 @@ def evaluate_position_delay(state: State, delay=300) -> tuple[float, float]: return 0, 0 +def evaluate_discovered_position(state: State) -> tuple[float, float]: + """ + Calculates the average positional error for detected aircrafts for both teams + """ + aircraft_dict = {aircraft_obj.aircraft.name: aircraft_obj.aircraft for aircraft_obj in state.red} + aircraft_dict.update({aircraft_obj.aircraft.name: aircraft_obj.aircraft for aircraft_obj in state.green}) + + distances_red = dict() + detected_red = set() # contains all names of aircrafts that have been detected by red team + for aircraft_obj in state.red: + assert len(aircraft_obj.sa) <= len(state.green)*2 + for sa_aircraft_obj in aircraft_obj.sa: + real_aircraft = aircraft_dict[sa_aircraft_obj.aircraft.name] + sa_aircraft = sa_aircraft_obj.aircraft + assert real_aircraft.name == sa_aircraft.name + detected_red.add(sa_aircraft.name) + dist = ((real_aircraft.position.x - sa_aircraft.position.x) ** 2 + (real_aircraft.position.y - sa_aircraft.position.y) ** 2) ** 0.5 + if sa_aircraft.name in distances_red: + distances_red[sa_aircraft.name].append(dist) + else: + distances_red[sa_aircraft.name] = [dist] + + + dist_red = sum([min(distances_red[discovered_enemy]) for discovered_enemy in distances_red]) + + distances_green = dict() + detected_green = set() # contains all names of aircrafts that have been detected by green team + for aircraft_obj in state.green: + assert len(aircraft_obj.sa) <= len(state.red)*2 + for sa_aircraft_obj in aircraft_obj.sa: + real_aircraft = aircraft_dict[sa_aircraft_obj.aircraft.name] + sa_aircraft = sa_aircraft_obj.aircraft + assert real_aircraft.name == sa_aircraft.name + detected_green.add(sa_aircraft.name) + dist = ((real_aircraft.position.x - sa_aircraft.position.x) ** 2 + (real_aircraft.position.y - sa_aircraft.position.y) ** 2) ** 0.5 + if sa_aircraft.name in distances_green: + distances_green[sa_aircraft.name].append(dist) + else: + distances_green[sa_aircraft.name] = [dist] + + + dist_green = sum([min(distances_green[discovered_enemy]) for discovered_enemy in distances_green]) + + if len(detected_red) == 0: + dist_red = np.inf + else: + dist_red = dist_red / len(detected_red) + + if len(detected_green) == 0: + dist_green = np.inf + else: + dist_green = dist_green / len(detected_green) + + + return -dist_green, -dist_red + +def evaluate_discovered_position_delay(state: State, delay=300) -> tuple[float, float]: + if state.time >= delay: + return evaluate_discovered_position(state) + else: + return -np.inf, -np.inf + + + def evaluate_state(state: State) -> Any: """ green, red = evaluat...() @@ -353,20 +431,34 @@ def evaluate_state(state: State) -> Any: position_delay_error = evaluate_position_delay(state) evaluation_dict["green__position_delay_error"], evaluation_dict["red__position_delay_error"] = position_delay_error + discovered_position_error = evaluate_discovered_position(state) + evaluation_dict["green__discovered_position_error"], evaluation_dict["red__discovered_position_error"] = discovered_position_error + + discovered_position_delay_error = evaluate_discovered_position_delay(state) + evaluation_dict["green__discovered_position_delay_error"], evaluation_dict["red__discovered_position_delay_error"] = discovered_position_delay_error + return evaluation_dict def cumulative_evaluation(eval_dict: dict, cumulative_dict: dict, scenario_scale=1) -> dict: - if cumulative_dict: - assert len(cumulative_dict) == len(eval_dict) - for k, v in eval_dict.items(): - cumulative_dict[k] += v / scenario_scale - else: - for k, v in eval_dict.items(): - cumulative_dict[k] = v / scenario_scale + for k,v in eval_dict.items(): + if k in cumulative_dict: + if k.split('__')[-1] in ['discovered_position_error', 'discovered_position_delay_error']: + if v != -np.inf: + cumulative_dict[k] += v + else: + cumulative_dict[k] += v / scenario_scale + else: + if k.split('__')[-1] in ['discovered_position_error', 'discovered_position_delay_error']: + if v != -np.inf: + cumulative_dict[k] = v + else: + cumulative_dict[k] = v / scenario_scale + return cumulative_dict + def eval_to_csv(evaluation_dict: dict, key_only=False) -> str: eval_str = "" green_behavior = "" @@ -379,9 +471,9 @@ def eval_to_csv(evaluation_dict: dict, key_only=False) -> str: del evaluation_dict['green__behaviour'] del evaluation_dict['red__behaviour'] last_step = True - + for k, v in sorted(evaluation_dict.items(), key=lambda x: x[0]): - if 'red' in k and last_step: + if 'red' in k.split('__')[0] and last_step: eval_str += "," + red_behavior last_step = False if key_only: @@ -406,6 +498,10 @@ def run_scenario(green_behaviour_name: str, red_behaviour_name: str, scenario: S if visualise: tk, canvas = create_canvas() + discovered_time_steps_green = 0 + discovered_time_steps_red = 0 + discovered_time_steps_green_delay = 0 + discovered_time_steps_red_delay = 0 for i in range(constants.SCENARIO_LENGTH): # Green select radar actions green_obs = get_green_observe_state(state) @@ -420,6 +516,18 @@ def run_scenario(green_behaviour_name: str, red_behaviour_name: str, scenario: S logger.info(f"Time step {i}") evaluation_dict = evaluate_state(state) + if evaluation_dict["green__discovered_position_error"] != np.inf: + discovered_time_steps_green += 1 + + if evaluation_dict["red__discovered_position_error"] != np.inf: + discovered_time_steps_red += 1 + + if evaluation_dict["green__discovered_position_delay_error"] != np.inf: + discovered_time_steps_green_delay += 1 + + if evaluation_dict["red__discovered_position_delay_error"] != np.inf: + discovered_time_steps_red_delay += 1 + logger.info(f"{evaluation_dict}") cumulative_evaluation_dict = cumulative_evaluation(evaluation_dict, cumulative_evaluation_dict, constants.SCENARIO_LENGTH) @@ -428,6 +536,20 @@ def run_scenario(green_behaviour_name: str, red_behaviour_name: str, scenario: S time.sleep(visualise_delay / 1000) state = step_state(state) + if 'green__discovered_position_delay_error' in cumulative_evaluation_dict: + cumulative_evaluation_dict["green__discovered_position_error"] /= discovered_time_steps_green + cumulative_evaluation_dict["green__discovered_position_delay_error"] /= discovered_time_steps_green_delay + else: + cumulative_evaluation_dict["green__discovered_position_error"] = -np.inf + cumulative_evaluation_dict["green__discovered_position_delay_error"] = -np.inf + + if 'red__discovered_position_delay_error' in cumulative_evaluation_dict: + cumulative_evaluation_dict["red__discovered_position_error"] /= discovered_time_steps_red + cumulative_evaluation_dict["red__discovered_position_delay_error"] /= discovered_time_steps_red_delay + else: + cumulative_evaluation_dict["red__discovered_position_error"] = -np.inf + cumulative_evaluation_dict["red__discovered_position_delay_error"] = -np.inf + logger.info("Cumulative results") for k, v in cumulative_evaluation_dict.items(): logger.info(f" {k}: {v}") @@ -441,3 +563,4 @@ def run_scenario(green_behaviour_name: str, red_behaviour_name: str, scenario: S csv_logger.info(csv_message) logger.info(keys) logger.info(csv_message) + diff --git a/trained_models/7749_threenights/config.py b/trained_models/7749_threenights/config.py new file mode 100644 index 0000000000000000000000000000000000000000..b6d77c3f116b68d7edcd14c52e665565b09badc5 --- /dev/null +++ b/trained_models/7749_threenights/config.py @@ -0,0 +1,330 @@ +import argparse +from behaviour import behaviour_choices + +def get_config(): + """ + The configuration parser for common hyperparameters of all environment. + Please reach each `scripts/train/<env>_runner.py` file to find private hyperparameters + only used in <env>. + + Prepare parameters: + --algorithm_name <algorithm_name> + specifiy the algorithm, including `["happo", "hatrpo"]` + --experiment_name <str> + an identifier to distinguish different experiment. + --seed <int> + set seed for numpy and torch + --seed_specify + by default True Random or specify seed for numpy/torch + --runing_id <int> + the runing index of experiment (default=1) + --cuda + by default True, will use GPU to train; or else will use CPU; + --cuda_deterministic + by default, make sure random seed effective. if set, bypass such function. + --n_training_threads <int> + number of training threads working in parallel. by default 1 + --n_rollout_threads <int> + number of parallel envs for training rollout. by default 32 + --n_eval_rollout_threads <int> + number of parallel envs for evaluating rollout. by default 1 + --n_render_rollout_threads <int> + number of parallel envs for rendering, could only be set as 1 for some environments. + --num_env_steps <int> + number of env steps to train (default: 10e6) + + + Env parameters: + --env_name <str> + specify the name of environment + --use_obs_instead_of_state + [only for some env] by default False, will use global state; or else will use concatenated local obs. + + Replay Buffer parameters: + --episode_length <int> + the max length of episode in the buffer. + + Network parameters: + --share_policy + by default True, all agents will share the same network; set to make training agents use different policies. + --use_centralized_V + by default True, use centralized training mode; or else will decentralized training mode. + --stacked_frames <int> + Number of input frames which should be stack together. + --hidden_size <int> + Dimension of hidden layers for actor/critic networks + --layer_N <int> + Number of layers for actor/critic networks + --use_ReLU + by default True, will use ReLU. or else will use Tanh. + --use_popart + by default True, use running mean and std to normalize rewards. + --use_feature_normalization + by default True, apply layernorm to normalize inputs. + --use_orthogonal + by default True, use Orthogonal initialization for weights and 0 initialization for biases. or else, will use xavier uniform inilialization. + --gain + by default 0.01, use the gain # of last action layer + --use_naive_recurrent_policy + by default False, use the whole trajectory to calculate hidden states. + --use_recurrent_policy + by default, use Recurrent Policy. If set, do not use. + --recurrent_N <int> + The number of recurrent layers ( default 1). + --data_chunk_length <int> + Time length of chunks used to train a recurrent_policy, default 10. + + Optimizer parameters: + --lr <float> + learning rate parameter, (default: 5e-4, fixed). + --critic_lr <float> + learning rate of critic (default: 5e-4, fixed) + --opti_eps <float> + RMSprop optimizer epsilon (default: 1e-5) + --weight_decay <float> + coefficience of weight decay (default: 0) + + TRPO parameters: + --kl_threshold <float> + the threshold of kl-divergence (default: 0.01) + --ls_step <int> + the step of line search (default: 10) + --accept_ratio <float> + accept ratio of loss improve (default: 0.5) + + PPO parameters: + --ppo_epoch <int> + number of ppo epochs (default: 15) + --use_clipped_value_loss + by default, clip loss value. If set, do not clip loss value. + --clip_param <float> + ppo clip parameter (default: 0.2) + --num_mini_batch <int> + number of batches for ppo (default: 1) + --entropy_coef <float> + entropy term coefficient (default: 0.01) + --use_max_grad_norm + by default, use max norm of gradients. If set, do not use. + --max_grad_norm <float> + max norm of gradients (default: 0.5) + --use_gae + by default, use generalized advantage estimation. If set, do not use gae. + --gamma <float> + discount factor for rewards (default: 0.99) + --gae_lambda <float> + gae lambda parameter (default: 0.95) + --use_proper_time_limits + by default, the return value does consider limits of time. If set, compute returns with considering time limits factor. + --use_huber_loss + by default, use huber loss. If set, do not use huber loss. + --use_value_active_masks + by default True, whether to mask useless data in value loss. + --huber_delta <float> + coefficient of huber loss. + + + Run parameters: + --use_linear_lr_decay + by default, do not apply linear decay to learning rate. If set, use a linear schedule on the learning rate + --save_interval <int> + time duration between contiunous twice models saving. + --log_interval <int> + time duration between contiunous twice log printing. + --model_dir <str> + by default None. set the path to pretrained model. + + Eval parameters: + --use_eval + by default, do not start evaluation. If set`, start evaluation alongside with training. + --eval_interval <int> + time duration between contiunous twice evaluation progress. + --eval_episodes <int> + number of episodes of a single evaluation. + + Render parameters: + --save_gifs + by default, do not save render video. If set, save video. + --use_render + by default, do not render the env during training. If set, start render. Note: something, the environment has internal render process which is not controlled by this hyperparam. + --render_episodes <int> + the number of episodes to render a given env + --ifi <float> + the play interval of each rendered image in saved video. + + Pretrained parameters: + + """ + parser = argparse.ArgumentParser(description='onpolicy_algorithm', formatter_class=argparse.RawDescriptionHelpFormatter) + + # prepare parameters + parser.add_argument("--algorithm_name", type=str, + default='happo', choices=["happo","hatrpo"]) + parser.add_argument("--experiment_name", type=str, + default="check", help="an identifier to distinguish different experiment.") + parser.add_argument("--seed", type=int, + default=1, help="Random seed for numpy/torch") + parser.add_argument("--seed_specify", action="store_true", + default=False, help="Random or specify seed for numpy/torch") + parser.add_argument("--red_behaviour", choices=behaviour_choices, type=str.lower, + default="simpleone", help="name of red behaviour to use for training. Ignored if rand_red_behaviour is True.") + parser.add_argument("--rand_red_behaviour", type=bool, + default=True, help="Randomizes opponent behaviour in each simulation for training. Uses red_behaviour if False.") + parser.add_argument("--rand_size", type=bool, + default=False, help="Randomizes both teams, teamsize in each simulation for training.") + parser.add_argument("--red_size", type=int, + default=7, help="2<=Size of red team<=7, works only with rand_size==false") + parser.add_argument("--green_size", type=int, + default=7, help="2<=Size of green team<=7, works only with rand_size==false") + parser.add_argument("--runing_id", type=int, + default=1, help="the runing index of experiment") + parser.add_argument("--cuda", action='store_false', + default=True, help="by default True, will use GPU to train; or else will use CPU;") + parser.add_argument("--cuda_deterministic", action='store_false', + default=True, help="by default, make sure random seed effective. if set, bypass such function.") + parser.add_argument("--n_training_threads", type=int, + default=1, help="Number of torch threads for training") + parser.add_argument("--n_rollout_threads", type=int, + default=20, help="Number of parallel envs for training rollouts") + parser.add_argument("--n_eval_rollout_threads", type=int, + default=1, help="Number of parallel envs for evaluating rollouts") + parser.add_argument("--n_render_rollout_threads", type=int, + default=1, help="Number of parallel envs for rendering rollouts") + parser.add_argument("--num_env_steps", type=int, + default=60000000, help='Number of environment steps to train (default: 6000)') + parser.add_argument("--user_name", type=str, + default='marl',help="[for wandb usage], to specify user's name for simply collecting training data.") + # env parameters + parser.add_argument("--env_name", type=str, + default='scontrol', help="specify the name of environment") + parser.add_argument("--use_obs_instead_of_state", action='store_true', + default=False, help="Whether to use global state or concatenated obs") + + # replay buffer parameters + parser.add_argument("--episode_length", type=int, + default=600, help="Max length for any episode") + + # network parameters + parser.add_argument("--share_policy", action='store_false', + default=True, help='Whether agent share the same policy') + parser.add_argument("--use_centralized_V", action='store_false', + default=True, help="Whether to use centralized V function") + parser.add_argument("--stacked_frames", type=int, + default=100, help="Dimension of hidden layers for actor/critic networks") + parser.add_argument("--use_stacked_frames", action='store_true', + default=True, help="Whether to use stacked_frames") + parser.add_argument("--hidden_size", type=int, + default=64, help="Dimension of hidden layers for actor/critic networks") + parser.add_argument("--layer_N", type=int, + default=2, help="Number of layers for actor/critic networks") + parser.add_argument("--use_ReLU", action='store_false', + default=True, help="Whether to use ReLU") + parser.add_argument("--use_popart", action='store_false', + default=True, help="by default True, use running mean and std to normalize rewards.") + parser.add_argument("--use_valuenorm", action='store_false', + default=True, help="by default True, use running mean and std to normalize rewards.") + parser.add_argument("--use_feature_normalization", action='store_false', + default=True, help="Whether to apply layernorm to the inputs") + parser.add_argument("--use_orthogonal", action='store_false', + default=True, help="Whether to use Orthogonal initialization for weights and 0 initialization for biases") + parser.add_argument("--gain", type=float, + default=0.01, help="The gain # of last action layer") + + # recurrent parameters + parser.add_argument("--use_naive_recurrent_policy", action='store_true', + default=False, help='Whether to use a naive recurrent policy') + parser.add_argument("--use_recurrent_policy", action='store_true', + default=False, help='use a recurrent policy') + parser.add_argument("--recurrent_N", type=int, + default=1, help="The number of recurrent layers.") + parser.add_argument("--data_chunk_length", type=int, + default=10, help="Time length of chunks used to train a recurrent_policy") + + # optimizer parameters + parser.add_argument("--lr", type=float, + default=1e-4, help='learning rate (default: 5e-4)') + parser.add_argument("--critic_lr", type=float, + default=1e-4, help='critic learning rate (default: 5e-4)') + parser.add_argument("--opti_eps", type=float, + default=1e-5, help='RMSprop optimizer epsilon (default: 1e-5)') + parser.add_argument("--weight_decay", type=float, default=0) + parser.add_argument("--std_x_coef", type=float, default=1) + parser.add_argument("--std_y_coef", type=float, default=0.5) + + + # trpo parameters + parser.add_argument("--kl_threshold", type=float, + default=0.01, help='the threshold of kl-divergence (default: 0.01)') + parser.add_argument("--ls_step", type=int, + default=10, help='number of line search (default: 10)') + parser.add_argument("--accept_ratio", type=float, + default=0.5, help='accept ratio of loss improve (default: 0.5)') + + # ppo parameters + parser.add_argument("--ppo_epoch", type=int, + default=15, help='number of ppo epochs (default: 15)') + parser.add_argument("--use_clipped_value_loss", action='store_false', + default=True, help="by default, clip loss value. If set, do not clip loss value.") + parser.add_argument("--clip_param", type=float, + default=0.2, help='ppo clip parameter (default: 0.2)') + parser.add_argument("--num_mini_batch", type=int, + default=1, help='number of batches for ppo (default: 1)') + parser.add_argument("--entropy_coef", type=float, + default=0.01, help='entropy term coefficient (default: 0.01)') + parser.add_argument("--value_loss_coef", type=float, + default=0.01, help='value loss coefficient (default: 0.5)') + parser.add_argument("--use_max_grad_norm", action='store_false', + default=True, help="by default, use max norm of gradients. If set, do not use.") + parser.add_argument("--max_grad_norm", type=float, + default=10.0, help='max norm of gradients (default: 0.5)') + parser.add_argument("--use_gae", action='store_false', + default=True, help='use generalized advantage estimation') + parser.add_argument("--gamma", type=float, default=0.999999999, + help='discount factor for rewards (default: 0.99)') + parser.add_argument("--gae_lambda", type=float, default=0.95, + help='gae lambda parameter (default: 0.95)') + parser.add_argument("--use_proper_time_limits", action='store_true', + default=False, help='compute returns taking into account time limits') + parser.add_argument("--use_huber_loss", action='store_false', + default=True, help="by default, use huber loss. If set, do not use huber loss.") + parser.add_argument("--use_value_active_masks", action='store_false', + default=True, help="by default True, whether to mask useless data in value loss.") + parser.add_argument("--use_policy_active_masks", action='store_false', + default=True, help="by default True, whether to mask useless data in policy loss.") + parser.add_argument("--huber_delta", type=float, + default=10.0, help=" coefficience of huber loss.") + + # run parameters + parser.add_argument("--use_linear_lr_decay", action='store_true', + default=False, help='use a linear schedule on the learning rate') + parser.add_argument("--save_interval", type=int, + default=1, help="time duration between contiunous twice models saving.") + parser.add_argument("--log_interval", type=int, + default=1, help="time duration between contiunous twice log printing.") + parser.add_argument("--model_dir", type=str, + default="./trained_models/7239_twonights/run1/models", help="by default None. set the path to pretrained model.") + + # eval parameters + parser.add_argument("--use_eval", action='store_true', + default=True, help="by default, do not start evaluation. If set`, start evaluation alongside with training.") + parser.add_argument("--eval_interval", type=int, + default=10, help="time duration between contiunous twice evaluation progress.") + parser.add_argument("--eval_episodes", type=int, + default=2, help="number of episodes of a single evaluation.") + parser.add_argument("--eval_render", type=bool, + default=False, help="visualizes the model every eval. works on top of the 'eval' setting in multiagent_rl_train.") + parser.add_argument("--visualise_delay", type=int, + default=1, help="numer of milliseconds to wait between steps when drawing eval render.") + parser.add_argument("--only_delay_middle", type=bool, + default=True, help="uses 0 delay for time < 150 and time > 350, and visualise delay for the middle") + + # render parameters + parser.add_argument("--save_gifs", action='store_true', + default=False, help="by default, do not save render video. If set, save video.") + parser.add_argument("--use_render", action='store_true', + default=False, help="by default, do not render the env during training. If set, start render. Note: something, the environment has internal render process which is not controlled by this hyperparam.") + parser.add_argument("--render_episodes", type=int, + default=5, help="the number of episodes to render a given env") + parser.add_argument("--ifi", type=float, + default=0.1, help="the play interval of each rendered image in saved video.") + + return parser \ No newline at end of file diff --git a/trained_models/7749_threenights/reward b/trained_models/7749_threenights/reward new file mode 100644 index 0000000000000000000000000000000000000000..de5eaace25ee018032d05f719a55edb6c529277b --- /dev/null +++ b/trained_models/7749_threenights/reward @@ -0,0 +1,10 @@ +agent_id: + reward[agent_id]['share_radar_actions'] + reward[agent_id]['detected_with_no_radar'] + + reward[agent_id]['detected_multiple'] + + (0.01*reward[agent_id]['position_error'] - 0.01*reward[agent_id]['enemy_position_error']) +eval: agent_id: (0.01*reward[agent_id]['position_error'] - 0.01*reward[agent_id]['enemy_position_error']) + + (0.01*reward[agent_id]['position_delay_error'] - 0.01*reward[agent_id]['enemy_position_delay_error']) + + (reward[agent_id]['aircraft_share_detected']-reward[agent_id]['enemy_aircraft_share_detected']) + + (reward[agent_id]['aircraft_share_delay_detected']-reward[agent_id]['enemy_aircraft_share_delay_detected']) + + (0.1*reward[agent_id]['radar_share_detected']-0.1*reward[agent_id]['enemy_radar_share_detected']) + + (0.1*reward[agent_id]['radar_share_delay_detected']-0.1*reward[agent_id]['enemy_radar_share_delay_detected']) + + (reward[agent_id]['share_radar_actions'] - reward[agent_id]['enemy_share_radar_actions']) diff --git a/trained_models/7749_threenights/run1/logs/agent0/actor_grad_norm/agent0/actor_grad_norm/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent0/actor_grad_norm/agent0/actor_grad_norm/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..8ab4ce1d77e299e6b580a7723944eaf63a5b6e39 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent0/actor_grad_norm/agent0/actor_grad_norm/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent0/average_step_adversarial_rewards/agent0/average_step_adversarial_rewards/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent0/average_step_adversarial_rewards/agent0/average_step_adversarial_rewards/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..ae49b048f31cab5e0b45f6c4403dfed7676bf136 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent0/average_step_adversarial_rewards/agent0/average_step_adversarial_rewards/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent0/average_step_rewards/agent0/average_step_rewards/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent0/average_step_rewards/agent0/average_step_rewards/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..10d9dc739d700b8de4f1bb3f6cd9c4c03f483c24 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent0/average_step_rewards/agent0/average_step_rewards/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent0/critic_grad_norm/agent0/critic_grad_norm/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent0/critic_grad_norm/agent0/critic_grad_norm/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..78b10013850499c3bcce28e0b5f5b7d50c8d0876 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent0/critic_grad_norm/agent0/critic_grad_norm/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent0/dist_entropy/agent0/dist_entropy/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent0/dist_entropy/agent0/dist_entropy/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..603cc830255d3aab260e443208a0025a2b7be981 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent0/dist_entropy/agent0/dist_entropy/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent0/policy_loss/agent0/policy_loss/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent0/policy_loss/agent0/policy_loss/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..46ecf2eceaa4ebeab450f0ac5416c965939679d2 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent0/policy_loss/agent0/policy_loss/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent0/ratio/agent0/ratio/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent0/ratio/agent0/ratio/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..198cc7f053c4bbcb8499915b6cd6cdd6c35775e0 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent0/ratio/agent0/ratio/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent0/value_loss/agent0/value_loss/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent0/value_loss/agent0/value_loss/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..d47170464fa229b9709753970736a7aa18712160 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent0/value_loss/agent0/value_loss/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent1/actor_grad_norm/agent1/actor_grad_norm/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent1/actor_grad_norm/agent1/actor_grad_norm/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..8c5fe809f8e1f79c7a9f75152b58c132c8ca0f3a Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent1/actor_grad_norm/agent1/actor_grad_norm/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent1/average_step_adversarial_rewards/agent1/average_step_adversarial_rewards/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent1/average_step_adversarial_rewards/agent1/average_step_adversarial_rewards/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..b154a84876d7aa103b692854c344251c7bec0a26 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent1/average_step_adversarial_rewards/agent1/average_step_adversarial_rewards/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent1/average_step_rewards/agent1/average_step_rewards/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent1/average_step_rewards/agent1/average_step_rewards/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..25da8205c6dbc51721c69349a667b0f8fd76f697 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent1/average_step_rewards/agent1/average_step_rewards/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent1/critic_grad_norm/agent1/critic_grad_norm/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent1/critic_grad_norm/agent1/critic_grad_norm/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..da4e7263988c7359e255176102046155062530c5 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent1/critic_grad_norm/agent1/critic_grad_norm/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent1/dist_entropy/agent1/dist_entropy/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent1/dist_entropy/agent1/dist_entropy/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..a1e3cd6fc364e67aaa2b074e8d2169db7f8acf2e Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent1/dist_entropy/agent1/dist_entropy/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent1/policy_loss/agent1/policy_loss/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent1/policy_loss/agent1/policy_loss/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..1acc3c66ed0c0cecc4c876b4d814519a3959e8e5 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent1/policy_loss/agent1/policy_loss/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent1/ratio/agent1/ratio/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent1/ratio/agent1/ratio/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..9920d97a5636a03a7326aa6d90e23b190a77238e Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent1/ratio/agent1/ratio/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent1/value_loss/agent1/value_loss/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent1/value_loss/agent1/value_loss/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..14ac5a25c91d39f444f52976bad46faee701e0f3 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent1/value_loss/agent1/value_loss/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent2/actor_grad_norm/agent2/actor_grad_norm/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent2/actor_grad_norm/agent2/actor_grad_norm/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..6cc33eac155f4332b21c98a61d8d747972639258 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent2/actor_grad_norm/agent2/actor_grad_norm/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent2/average_step_adversarial_rewards/agent2/average_step_adversarial_rewards/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent2/average_step_adversarial_rewards/agent2/average_step_adversarial_rewards/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..78915971a25003d361436cafbb9c64a503854a03 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent2/average_step_adversarial_rewards/agent2/average_step_adversarial_rewards/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent2/average_step_rewards/agent2/average_step_rewards/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent2/average_step_rewards/agent2/average_step_rewards/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..81e740548cf41f8b4f7dbf545aef7452802f14fb Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent2/average_step_rewards/agent2/average_step_rewards/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent2/critic_grad_norm/agent2/critic_grad_norm/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent2/critic_grad_norm/agent2/critic_grad_norm/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..12327f66769506b67deb443091f51ceba8189932 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent2/critic_grad_norm/agent2/critic_grad_norm/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent2/dist_entropy/agent2/dist_entropy/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent2/dist_entropy/agent2/dist_entropy/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..d5a4a71c332bfe1feef31f44e321511c5cae28c8 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent2/dist_entropy/agent2/dist_entropy/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent2/policy_loss/agent2/policy_loss/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent2/policy_loss/agent2/policy_loss/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..62bcb3f3951d61f64bd86ae7d6a750b6a3857fd1 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent2/policy_loss/agent2/policy_loss/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent2/ratio/agent2/ratio/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent2/ratio/agent2/ratio/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..08694010ea4b5739b5d6a9810746f5a9d2b08d8e Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent2/ratio/agent2/ratio/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent2/value_loss/agent2/value_loss/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent2/value_loss/agent2/value_loss/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..d0c0cc5ce1f4643666f191818c76a600691072c5 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent2/value_loss/agent2/value_loss/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent3/actor_grad_norm/agent3/actor_grad_norm/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent3/actor_grad_norm/agent3/actor_grad_norm/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..9ffc02d15850c322fc812c2eae4ee9e345b54182 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent3/actor_grad_norm/agent3/actor_grad_norm/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent3/average_step_adversarial_rewards/agent3/average_step_adversarial_rewards/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent3/average_step_adversarial_rewards/agent3/average_step_adversarial_rewards/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..c87aee624d800a35e1a24e4a23f5fe1d967cba68 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent3/average_step_adversarial_rewards/agent3/average_step_adversarial_rewards/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent3/average_step_rewards/agent3/average_step_rewards/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent3/average_step_rewards/agent3/average_step_rewards/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..13ab86ff984a5cd6eb7f9c6d299e4a7e024ebfc7 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent3/average_step_rewards/agent3/average_step_rewards/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent3/critic_grad_norm/agent3/critic_grad_norm/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent3/critic_grad_norm/agent3/critic_grad_norm/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..86f412ae47ab45683e86e8150e6989c4a496ffa6 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent3/critic_grad_norm/agent3/critic_grad_norm/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent3/dist_entropy/agent3/dist_entropy/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent3/dist_entropy/agent3/dist_entropy/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..f00c313f7893d938fb0110f11b24208be5b833e4 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent3/dist_entropy/agent3/dist_entropy/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent3/policy_loss/agent3/policy_loss/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent3/policy_loss/agent3/policy_loss/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..9cc9a0ab81129078e661df20cfa3ae97de620d0e Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent3/policy_loss/agent3/policy_loss/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent3/ratio/agent3/ratio/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent3/ratio/agent3/ratio/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..6aaaab58116694033a9802a1549660b3707dcb4a Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent3/ratio/agent3/ratio/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent3/value_loss/agent3/value_loss/events.out.tfevents.1670334143.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent3/value_loss/agent3/value_loss/events.out.tfevents.1670334143.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..c0a64008fd9db78071ea414f0356895827fd1a85 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent3/value_loss/agent3/value_loss/events.out.tfevents.1670334143.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent4/actor_grad_norm/agent4/actor_grad_norm/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent4/actor_grad_norm/agent4/actor_grad_norm/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..d55053bf311c985b2eda596413c4b3a410d6e78e Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent4/actor_grad_norm/agent4/actor_grad_norm/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent4/average_step_adversarial_rewards/agent4/average_step_adversarial_rewards/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent4/average_step_adversarial_rewards/agent4/average_step_adversarial_rewards/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..ab1e5206ae2c11f08d57ec4ff3dd300880ddbffc Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent4/average_step_adversarial_rewards/agent4/average_step_adversarial_rewards/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent4/average_step_rewards/agent4/average_step_rewards/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent4/average_step_rewards/agent4/average_step_rewards/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..7714b4d8dee3dbb51dd91980b74128d82168a1d4 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent4/average_step_rewards/agent4/average_step_rewards/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent4/critic_grad_norm/agent4/critic_grad_norm/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent4/critic_grad_norm/agent4/critic_grad_norm/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..af0d4d7ca62b7b8d5ea6003912c6523867692997 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent4/critic_grad_norm/agent4/critic_grad_norm/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent4/dist_entropy/agent4/dist_entropy/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent4/dist_entropy/agent4/dist_entropy/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..e2d09253b6b5376e18a3440f900f9bf98d6fdd41 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent4/dist_entropy/agent4/dist_entropy/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent4/policy_loss/agent4/policy_loss/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent4/policy_loss/agent4/policy_loss/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..b66428810b6a01e274818686c9b83fab4c4afb1f Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent4/policy_loss/agent4/policy_loss/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent4/ratio/agent4/ratio/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent4/ratio/agent4/ratio/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..0c0327615a84978c60ba34d10cea1fa9e681c4f3 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent4/ratio/agent4/ratio/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent4/value_loss/agent4/value_loss/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent4/value_loss/agent4/value_loss/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..86c7461014da09d2b0f9acfcff64054a026c00ef Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent4/value_loss/agent4/value_loss/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent5/actor_grad_norm/agent5/actor_grad_norm/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent5/actor_grad_norm/agent5/actor_grad_norm/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..49201d74eccc6e93fe07083cbaa1c83c27ef31c8 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent5/actor_grad_norm/agent5/actor_grad_norm/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent5/average_step_adversarial_rewards/agent5/average_step_adversarial_rewards/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent5/average_step_adversarial_rewards/agent5/average_step_adversarial_rewards/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..9faa7a7ae4e5e1a37a63c81f12a5bb9ed9248678 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent5/average_step_adversarial_rewards/agent5/average_step_adversarial_rewards/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent5/average_step_rewards/agent5/average_step_rewards/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent5/average_step_rewards/agent5/average_step_rewards/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..7f643b54f50a575b0f461addd1773e6b0252b935 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent5/average_step_rewards/agent5/average_step_rewards/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent5/critic_grad_norm/agent5/critic_grad_norm/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent5/critic_grad_norm/agent5/critic_grad_norm/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..0e4cd7402e43f208589516fe783c9e36429520e5 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent5/critic_grad_norm/agent5/critic_grad_norm/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent5/dist_entropy/agent5/dist_entropy/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent5/dist_entropy/agent5/dist_entropy/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..324ee2e88b38994bf601f49e93db179004b8481e Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent5/dist_entropy/agent5/dist_entropy/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent5/policy_loss/agent5/policy_loss/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent5/policy_loss/agent5/policy_loss/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..04e7d30f5db560960d51c579fb1aab8f8bbbecad Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent5/policy_loss/agent5/policy_loss/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent5/ratio/agent5/ratio/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent5/ratio/agent5/ratio/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..2557e0ae963733d0e73736a5674cb8e9bdf1b536 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent5/ratio/agent5/ratio/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent5/value_loss/agent5/value_loss/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent5/value_loss/agent5/value_loss/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..3be6aa7f5d0da90947de002a3f82d39caca08434 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent5/value_loss/agent5/value_loss/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent6/actor_grad_norm/agent6/actor_grad_norm/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent6/actor_grad_norm/agent6/actor_grad_norm/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..81aaa65cdef9532615b2510323d81d1bc4806d47 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent6/actor_grad_norm/agent6/actor_grad_norm/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent6/average_step_adversarial_rewards/agent6/average_step_adversarial_rewards/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent6/average_step_adversarial_rewards/agent6/average_step_adversarial_rewards/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..7f18a346db327983ea489344c8f22e24d22563fe Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent6/average_step_adversarial_rewards/agent6/average_step_adversarial_rewards/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent6/average_step_rewards/agent6/average_step_rewards/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent6/average_step_rewards/agent6/average_step_rewards/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..5f8dfeeeaaa39a34444ca2c806a4ead40062363d Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent6/average_step_rewards/agent6/average_step_rewards/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent6/critic_grad_norm/agent6/critic_grad_norm/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent6/critic_grad_norm/agent6/critic_grad_norm/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..2eec60c422a1979f286ebe47fc9ac0fc4b15eda5 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent6/critic_grad_norm/agent6/critic_grad_norm/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent6/dist_entropy/agent6/dist_entropy/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent6/dist_entropy/agent6/dist_entropy/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..07fadb73272d559cba78436105e1c4c7201923a2 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent6/dist_entropy/agent6/dist_entropy/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent6/policy_loss/agent6/policy_loss/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent6/policy_loss/agent6/policy_loss/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..fef8a415d7f2bbc734110c9329298883c667ce51 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent6/policy_loss/agent6/policy_loss/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent6/ratio/agent6/ratio/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent6/ratio/agent6/ratio/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..842cc293996c1ba7fac378ce0e793f6e080be229 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent6/ratio/agent6/ratio/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/agent6/value_loss/agent6/value_loss/events.out.tfevents.1670334144.maiex1-104 b/trained_models/7749_threenights/run1/logs/agent6/value_loss/agent6/value_loss/events.out.tfevents.1670334144.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..679ba61b6c349674a6a4a6597099cc1a43615082 Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/agent6/value_loss/agent6/value_loss/events.out.tfevents.1670334144.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/eval_average_episode_rewards/eval_average_episode_rewards/events.out.tfevents.1670335493.maiex1-104 b/trained_models/7749_threenights/run1/logs/eval_average_episode_rewards/eval_average_episode_rewards/events.out.tfevents.1670335493.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..c302aa6cddda84f65d18f0aa5a46dab0fec0f80b Binary files /dev/null and b/trained_models/7749_threenights/run1/logs/eval_average_episode_rewards/eval_average_episode_rewards/events.out.tfevents.1670335493.maiex1-104 differ diff --git a/trained_models/7749_threenights/run1/logs/events.out.tfevents.1670333987.maiex1-104 b/trained_models/7749_threenights/run1/logs/events.out.tfevents.1670333987.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/trained_models/7749_threenights/run1/models/actor_agent0.pt b/trained_models/7749_threenights/run1/models/actor_agent0.pt new file mode 100644 index 0000000000000000000000000000000000000000..e790516d1f9eea9a631065357d60e767c3493d42 Binary files /dev/null and b/trained_models/7749_threenights/run1/models/actor_agent0.pt differ diff --git a/trained_models/7749_threenights/run1/models/actor_agent1.pt b/trained_models/7749_threenights/run1/models/actor_agent1.pt new file mode 100644 index 0000000000000000000000000000000000000000..56271cfa2ee62c8fc13b1f34e36da70db739eb6a Binary files /dev/null and b/trained_models/7749_threenights/run1/models/actor_agent1.pt differ diff --git a/trained_models/7749_threenights/run1/models/actor_agent2.pt b/trained_models/7749_threenights/run1/models/actor_agent2.pt new file mode 100644 index 0000000000000000000000000000000000000000..98852f5073ddd1e66b12f596defdd5e5872ea4fe Binary files /dev/null and b/trained_models/7749_threenights/run1/models/actor_agent2.pt differ diff --git a/trained_models/7749_threenights/run1/models/actor_agent3.pt b/trained_models/7749_threenights/run1/models/actor_agent3.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c4387647bb2c4ad160878993768d0dec50835cf Binary files /dev/null and b/trained_models/7749_threenights/run1/models/actor_agent3.pt differ diff --git a/trained_models/7749_threenights/run1/models/actor_agent4.pt b/trained_models/7749_threenights/run1/models/actor_agent4.pt new file mode 100644 index 0000000000000000000000000000000000000000..85973254e9dccc84afd6260122a873871e3d4a15 Binary files /dev/null and b/trained_models/7749_threenights/run1/models/actor_agent4.pt differ diff --git a/trained_models/7749_threenights/run1/models/actor_agent5.pt b/trained_models/7749_threenights/run1/models/actor_agent5.pt new file mode 100644 index 0000000000000000000000000000000000000000..960d94bdfe440d3ec5a0f3a0f4c9499b26590b69 Binary files /dev/null and b/trained_models/7749_threenights/run1/models/actor_agent5.pt differ diff --git a/trained_models/7749_threenights/run1/models/actor_agent6.pt b/trained_models/7749_threenights/run1/models/actor_agent6.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc613ccb830e5b332d2a172639a2fa204758bb51 Binary files /dev/null and b/trained_models/7749_threenights/run1/models/actor_agent6.pt differ diff --git a/trained_models/7749_threenights/run1/models/critic_agent0.pt b/trained_models/7749_threenights/run1/models/critic_agent0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b8453f32fafd8226057b7c3370bbb8f0f206dad Binary files /dev/null and b/trained_models/7749_threenights/run1/models/critic_agent0.pt differ diff --git a/trained_models/7749_threenights/run1/models/critic_agent1.pt b/trained_models/7749_threenights/run1/models/critic_agent1.pt new file mode 100644 index 0000000000000000000000000000000000000000..02959e86a9c55018aeff46f2f524b9967dda64a5 Binary files /dev/null and b/trained_models/7749_threenights/run1/models/critic_agent1.pt differ diff --git a/trained_models/7749_threenights/run1/models/critic_agent2.pt b/trained_models/7749_threenights/run1/models/critic_agent2.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a05950db0b2b6511e72d4c0c5ea9dbd9b376e81 Binary files /dev/null and b/trained_models/7749_threenights/run1/models/critic_agent2.pt differ diff --git a/trained_models/7749_threenights/run1/models/critic_agent3.pt b/trained_models/7749_threenights/run1/models/critic_agent3.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b6e5603e68009d382a96d5c885625e2f6e234d2 Binary files /dev/null and b/trained_models/7749_threenights/run1/models/critic_agent3.pt differ diff --git a/trained_models/7749_threenights/run1/models/critic_agent4.pt b/trained_models/7749_threenights/run1/models/critic_agent4.pt new file mode 100644 index 0000000000000000000000000000000000000000..36bcca950aad892f3cccc8d2f2d82f539d4c8b35 Binary files /dev/null and b/trained_models/7749_threenights/run1/models/critic_agent4.pt differ diff --git a/trained_models/7749_threenights/run1/models/critic_agent5.pt b/trained_models/7749_threenights/run1/models/critic_agent5.pt new file mode 100644 index 0000000000000000000000000000000000000000..55829020943844b4b35eb2c7cbcb56ecb5454337 Binary files /dev/null and b/trained_models/7749_threenights/run1/models/critic_agent5.pt differ diff --git a/trained_models/7749_threenights/run1/models/critic_agent6.pt b/trained_models/7749_threenights/run1/models/critic_agent6.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae1f5172f9c86daf509df5ccb09ecb65ca89ac85 Binary files /dev/null and b/trained_models/7749_threenights/run1/models/critic_agent6.pt differ