diff --git a/configs/config.py b/configs/config.py index 0b0021d5b4b249b390c4000e926fc3ed7af4b66d..16b2ff802e5be18f8307380ce8dedfde19d8e867 100644 --- a/configs/config.py +++ b/configs/config.py @@ -301,7 +301,7 @@ def get_config(): parser.add_argument("--log_interval", type=int, default=1, help="time duration between contiunous twice log printing.") parser.add_argument("--model_dir", type=str, - default="./trained_models/7749_threenights/run1/models", help="by default None. set the path to pretrained model.") + default="./trained_models/9191_fournights/run1/models", help="by default None. set the path to pretrained model.") # eval parameters parser.add_argument("--use_eval", action='store_true', diff --git a/trained_models/9191_fournights/config.py b/trained_models/9191_fournights/config.py new file mode 100644 index 0000000000000000000000000000000000000000..2d40fcd244a00fa0713f8da2e9b6a5468d7b3559 --- /dev/null +++ b/trained_models/9191_fournights/config.py @@ -0,0 +1,330 @@ +import argparse +from behaviour import behaviour_choices + +def get_config(): + """ + The configuration parser for common hyperparameters of all environment. + Please reach each `scripts/train/<env>_runner.py` file to find private hyperparameters + only used in <env>. + + Prepare parameters: + --algorithm_name <algorithm_name> + specifiy the algorithm, including `["happo", "hatrpo"]` + --experiment_name <str> + an identifier to distinguish different experiment. + --seed <int> + set seed for numpy and torch + --seed_specify + by default True Random or specify seed for numpy/torch + --runing_id <int> + the runing index of experiment (default=1) + --cuda + by default True, will use GPU to train; or else will use CPU; + --cuda_deterministic + by default, make sure random seed effective. if set, bypass such function. + --n_training_threads <int> + number of training threads working in parallel. by default 1 + --n_rollout_threads <int> + number of parallel envs for training rollout. by default 32 + --n_eval_rollout_threads <int> + number of parallel envs for evaluating rollout. by default 1 + --n_render_rollout_threads <int> + number of parallel envs for rendering, could only be set as 1 for some environments. + --num_env_steps <int> + number of env steps to train (default: 10e6) + + + Env parameters: + --env_name <str> + specify the name of environment + --use_obs_instead_of_state + [only for some env] by default False, will use global state; or else will use concatenated local obs. + + Replay Buffer parameters: + --episode_length <int> + the max length of episode in the buffer. + + Network parameters: + --share_policy + by default True, all agents will share the same network; set to make training agents use different policies. + --use_centralized_V + by default True, use centralized training mode; or else will decentralized training mode. + --stacked_frames <int> + Number of input frames which should be stack together. + --hidden_size <int> + Dimension of hidden layers for actor/critic networks + --layer_N <int> + Number of layers for actor/critic networks + --use_ReLU + by default True, will use ReLU. or else will use Tanh. + --use_popart + by default True, use running mean and std to normalize rewards. + --use_feature_normalization + by default True, apply layernorm to normalize inputs. + --use_orthogonal + by default True, use Orthogonal initialization for weights and 0 initialization for biases. or else, will use xavier uniform inilialization. + --gain + by default 0.01, use the gain # of last action layer + --use_naive_recurrent_policy + by default False, use the whole trajectory to calculate hidden states. + --use_recurrent_policy + by default, use Recurrent Policy. If set, do not use. + --recurrent_N <int> + The number of recurrent layers ( default 1). + --data_chunk_length <int> + Time length of chunks used to train a recurrent_policy, default 10. + + Optimizer parameters: + --lr <float> + learning rate parameter, (default: 5e-4, fixed). + --critic_lr <float> + learning rate of critic (default: 5e-4, fixed) + --opti_eps <float> + RMSprop optimizer epsilon (default: 1e-5) + --weight_decay <float> + coefficience of weight decay (default: 0) + + TRPO parameters: + --kl_threshold <float> + the threshold of kl-divergence (default: 0.01) + --ls_step <int> + the step of line search (default: 10) + --accept_ratio <float> + accept ratio of loss improve (default: 0.5) + + PPO parameters: + --ppo_epoch <int> + number of ppo epochs (default: 15) + --use_clipped_value_loss + by default, clip loss value. If set, do not clip loss value. + --clip_param <float> + ppo clip parameter (default: 0.2) + --num_mini_batch <int> + number of batches for ppo (default: 1) + --entropy_coef <float> + entropy term coefficient (default: 0.01) + --use_max_grad_norm + by default, use max norm of gradients. If set, do not use. + --max_grad_norm <float> + max norm of gradients (default: 0.5) + --use_gae + by default, use generalized advantage estimation. If set, do not use gae. + --gamma <float> + discount factor for rewards (default: 0.99) + --gae_lambda <float> + gae lambda parameter (default: 0.95) + --use_proper_time_limits + by default, the return value does consider limits of time. If set, compute returns with considering time limits factor. + --use_huber_loss + by default, use huber loss. If set, do not use huber loss. + --use_value_active_masks + by default True, whether to mask useless data in value loss. + --huber_delta <float> + coefficient of huber loss. + + + Run parameters: + --use_linear_lr_decay + by default, do not apply linear decay to learning rate. If set, use a linear schedule on the learning rate + --save_interval <int> + time duration between contiunous twice models saving. + --log_interval <int> + time duration between contiunous twice log printing. + --model_dir <str> + by default None. set the path to pretrained model. + + Eval parameters: + --use_eval + by default, do not start evaluation. If set`, start evaluation alongside with training. + --eval_interval <int> + time duration between contiunous twice evaluation progress. + --eval_episodes <int> + number of episodes of a single evaluation. + + Render parameters: + --save_gifs + by default, do not save render video. If set, save video. + --use_render + by default, do not render the env during training. If set, start render. Note: something, the environment has internal render process which is not controlled by this hyperparam. + --render_episodes <int> + the number of episodes to render a given env + --ifi <float> + the play interval of each rendered image in saved video. + + Pretrained parameters: + + """ + parser = argparse.ArgumentParser(description='onpolicy_algorithm', formatter_class=argparse.RawDescriptionHelpFormatter) + + # prepare parameters + parser.add_argument("--algorithm_name", type=str, + default='happo', choices=["happo","hatrpo"]) + parser.add_argument("--experiment_name", type=str, + default="check", help="an identifier to distinguish different experiment.") + parser.add_argument("--seed", type=int, + default=1, help="Random seed for numpy/torch") + parser.add_argument("--seed_specify", action="store_true", + default=False, help="Random or specify seed for numpy/torch") + parser.add_argument("--red_behaviour", choices=behaviour_choices, type=str.lower, + default="simpleone", help="name of red behaviour to use for training. Ignored if rand_red_behaviour is True.") + parser.add_argument("--rand_red_behaviour", type=bool, + default=True, help="Randomizes opponent behaviour in each simulation for training. Uses red_behaviour if False.") + parser.add_argument("--rand_size", type=bool, + default=True, help="Randomizes both teams, teamsize in each simulation for training.") + parser.add_argument("--red_size", type=int, + default=7, help="2<=Size of red team<=7, works only with rand_size==false") + parser.add_argument("--green_size", type=int, + default=7, help="2<=Size of green team<=7, works only with rand_size==false") + parser.add_argument("--runing_id", type=int, + default=1, help="the runing index of experiment") + parser.add_argument("--cuda", action='store_false', + default=True, help="by default True, will use GPU to train; or else will use CPU;") + parser.add_argument("--cuda_deterministic", action='store_false', + default=True, help="by default, make sure random seed effective. if set, bypass such function.") + parser.add_argument("--n_training_threads", type=int, + default=20, help="Number of torch threads for training") + parser.add_argument("--n_rollout_threads", type=int, + default=1, help="Number of parallel envs for training rollouts") + parser.add_argument("--n_eval_rollout_threads", type=int, + default=1, help="Number of parallel envs for evaluating rollouts") + parser.add_argument("--n_render_rollout_threads", type=int, + default=1, help="Number of parallel envs for rendering rollouts") + parser.add_argument("--num_env_steps", type=int, + default=60000000, help='Number of environment steps to train (default: 6000)') + parser.add_argument("--user_name", type=str, + default='marl',help="[for wandb usage], to specify user's name for simply collecting training data.") + # env parameters + parser.add_argument("--env_name", type=str, + default='scontrol', help="specify the name of environment") + parser.add_argument("--use_obs_instead_of_state", action='store_true', + default=False, help="Whether to use global state or concatenated obs") + + # replay buffer parameters + parser.add_argument("--episode_length", type=int, + default=600, help="Max length for any episode") + + # network parameters + parser.add_argument("--share_policy", action='store_false', + default=True, help='Whether agent share the same policy') + parser.add_argument("--use_centralized_V", action='store_false', + default=True, help="Whether to use centralized V function") + parser.add_argument("--stacked_frames", type=int, + default=100, help="Dimension of hidden layers for actor/critic networks") + parser.add_argument("--use_stacked_frames", action='store_true', + default=True, help="Whether to use stacked_frames") + parser.add_argument("--hidden_size", type=int, + default=64, help="Dimension of hidden layers for actor/critic networks") + parser.add_argument("--layer_N", type=int, + default=2, help="Number of layers for actor/critic networks") + parser.add_argument("--use_ReLU", action='store_false', + default=True, help="Whether to use ReLU") + parser.add_argument("--use_popart", action='store_false', + default=True, help="by default True, use running mean and std to normalize rewards.") + parser.add_argument("--use_valuenorm", action='store_false', + default=True, help="by default True, use running mean and std to normalize rewards.") + parser.add_argument("--use_feature_normalization", action='store_false', + default=True, help="Whether to apply layernorm to the inputs") + parser.add_argument("--use_orthogonal", action='store_false', + default=True, help="Whether to use Orthogonal initialization for weights and 0 initialization for biases") + parser.add_argument("--gain", type=float, + default=0.01, help="The gain # of last action layer") + + # recurrent parameters + parser.add_argument("--use_naive_recurrent_policy", action='store_true', + default=False, help='Whether to use a naive recurrent policy') + parser.add_argument("--use_recurrent_policy", action='store_true', + default=False, help='use a recurrent policy') + parser.add_argument("--recurrent_N", type=int, + default=1, help="The number of recurrent layers.") + parser.add_argument("--data_chunk_length", type=int, + default=10, help="Time length of chunks used to train a recurrent_policy") + + # optimizer parameters + parser.add_argument("--lr", type=float, + default=1e-4, help='learning rate (default: 5e-4)') + parser.add_argument("--critic_lr", type=float, + default=1e-4, help='critic learning rate (default: 5e-4)') + parser.add_argument("--opti_eps", type=float, + default=1e-5, help='RMSprop optimizer epsilon (default: 1e-5)') + parser.add_argument("--weight_decay", type=float, default=0) + parser.add_argument("--std_x_coef", type=float, default=1) + parser.add_argument("--std_y_coef", type=float, default=0.5) + + + # trpo parameters + parser.add_argument("--kl_threshold", type=float, + default=0.01, help='the threshold of kl-divergence (default: 0.01)') + parser.add_argument("--ls_step", type=int, + default=10, help='number of line search (default: 10)') + parser.add_argument("--accept_ratio", type=float, + default=0.5, help='accept ratio of loss improve (default: 0.5)') + + # ppo parameters + parser.add_argument("--ppo_epoch", type=int, + default=15, help='number of ppo epochs (default: 15)') + parser.add_argument("--use_clipped_value_loss", action='store_false', + default=True, help="by default, clip loss value. If set, do not clip loss value.") + parser.add_argument("--clip_param", type=float, + default=0.2, help='ppo clip parameter (default: 0.2)') + parser.add_argument("--num_mini_batch", type=int, + default=1, help='number of batches for ppo (default: 1)') + parser.add_argument("--entropy_coef", type=float, + default=0.01, help='entropy term coefficient (default: 0.01)') + parser.add_argument("--value_loss_coef", type=float, + default=0.01, help='value loss coefficient (default: 0.5)') + parser.add_argument("--use_max_grad_norm", action='store_false', + default=True, help="by default, use max norm of gradients. If set, do not use.") + parser.add_argument("--max_grad_norm", type=float, + default=10.0, help='max norm of gradients (default: 0.5)') + parser.add_argument("--use_gae", action='store_false', + default=True, help='use generalized advantage estimation') + parser.add_argument("--gamma", type=float, default=0.999999999, + help='discount factor for rewards (default: 0.99)') + parser.add_argument("--gae_lambda", type=float, default=0.95, + help='gae lambda parameter (default: 0.95)') + parser.add_argument("--use_proper_time_limits", action='store_true', + default=False, help='compute returns taking into account time limits') + parser.add_argument("--use_huber_loss", action='store_false', + default=True, help="by default, use huber loss. If set, do not use huber loss.") + parser.add_argument("--use_value_active_masks", action='store_false', + default=True, help="by default True, whether to mask useless data in value loss.") + parser.add_argument("--use_policy_active_masks", action='store_false', + default=True, help="by default True, whether to mask useless data in policy loss.") + parser.add_argument("--huber_delta", type=float, + default=10.0, help=" coefficience of huber loss.") + + # run parameters + parser.add_argument("--use_linear_lr_decay", action='store_true', + default=False, help='use a linear schedule on the learning rate') + parser.add_argument("--save_interval", type=int, + default=1, help="time duration between contiunous twice models saving.") + parser.add_argument("--log_interval", type=int, + default=1, help="time duration between contiunous twice log printing.") + parser.add_argument("--model_dir", type=str, + default="./trained_models/7749_threenights/run1/models", help="by default None. set the path to pretrained model.") + + # eval parameters + parser.add_argument("--use_eval", action='store_true', + default=True, help="by default, do not start evaluation. If set`, start evaluation alongside with training.") + parser.add_argument("--eval_interval", type=int, + default=10, help="time duration between contiunous twice evaluation progress.") + parser.add_argument("--eval_episodes", type=int, + default=2, help="number of episodes of a single evaluation.") + parser.add_argument("--eval_render", type=bool, + default=False, help="visualizes the model every eval. works on top of the 'eval' setting in multiagent_rl_train.") + parser.add_argument("--visualise_delay", type=int, + default=1, help="numer of milliseconds to wait between steps when drawing eval render.") + parser.add_argument("--only_delay_middle", type=bool, + default=True, help="uses 0 delay for time < 150 and time > 350, and visualise delay for the middle") + + # render parameters + parser.add_argument("--save_gifs", action='store_true', + default=False, help="by default, do not save render video. If set, save video.") + parser.add_argument("--use_render", action='store_true', + default=False, help="by default, do not render the env during training. If set, start render. Note: something, the environment has internal render process which is not controlled by this hyperparam.") + parser.add_argument("--render_episodes", type=int, + default=5, help="the number of episodes to render a given env") + parser.add_argument("--ifi", type=float, + default=0.1, help="the play interval of each rendered image in saved video.") + + return parser \ No newline at end of file diff --git a/trained_models/9191_fournights/reward b/trained_models/9191_fournights/reward new file mode 100644 index 0000000000000000000000000000000000000000..de5eaace25ee018032d05f719a55edb6c529277b --- /dev/null +++ b/trained_models/9191_fournights/reward @@ -0,0 +1,10 @@ +agent_id: + reward[agent_id]['share_radar_actions'] + reward[agent_id]['detected_with_no_radar'] + + reward[agent_id]['detected_multiple'] + + (0.01*reward[agent_id]['position_error'] - 0.01*reward[agent_id]['enemy_position_error']) +eval: agent_id: (0.01*reward[agent_id]['position_error'] - 0.01*reward[agent_id]['enemy_position_error']) + + (0.01*reward[agent_id]['position_delay_error'] - 0.01*reward[agent_id]['enemy_position_delay_error']) + + (reward[agent_id]['aircraft_share_detected']-reward[agent_id]['enemy_aircraft_share_detected']) + + (reward[agent_id]['aircraft_share_delay_detected']-reward[agent_id]['enemy_aircraft_share_delay_detected']) + + (0.1*reward[agent_id]['radar_share_detected']-0.1*reward[agent_id]['enemy_radar_share_detected']) + + (0.1*reward[agent_id]['radar_share_delay_detected']-0.1*reward[agent_id]['enemy_radar_share_delay_detected']) + + (reward[agent_id]['share_radar_actions'] - reward[agent_id]['enemy_share_radar_actions']) diff --git a/trained_models/9191_fournights/run1/logs/agent0/actor_grad_norm/agent0/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent0/actor_grad_norm/agent0/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..f182b9800671a6af14ac9641a1736d27d880d149 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent0/actor_grad_norm/agent0/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent0/average_step_adversarial_rewards/agent0/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent0/average_step_adversarial_rewards/agent0/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..8308e715defcc6fe9248a16a9ae7b111732cb653 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent0/average_step_adversarial_rewards/agent0/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent0/average_step_rewards/agent0/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent0/average_step_rewards/agent0/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..4984f068655e5003dd6c0c42056ac10ebe23dd39 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent0/average_step_rewards/agent0/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent0/critic_grad_norm/agent0/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent0/critic_grad_norm/agent0/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..1b8cfdc7c5b088a778e920063bfa726ac5aac21c Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent0/critic_grad_norm/agent0/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent0/dist_entropy/agent0/dist_entropy/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent0/dist_entropy/agent0/dist_entropy/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..285510669932610dcba85ebc5cc8e9223ea0d223 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent0/dist_entropy/agent0/dist_entropy/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent0/policy_loss/agent0/policy_loss/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent0/policy_loss/agent0/policy_loss/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..e9efb1c6057d8c24b25ab0534ffd9a25196eeed2 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent0/policy_loss/agent0/policy_loss/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent0/ratio/agent0/ratio/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent0/ratio/agent0/ratio/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..4c8ccb0f052f3a89f46bbe5c9f6f19a4eb18ae77 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent0/ratio/agent0/ratio/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent0/value_loss/agent0/value_loss/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent0/value_loss/agent0/value_loss/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..3f4a528430c21958963bd647f4b91b0bcc713398 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent0/value_loss/agent0/value_loss/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent1/actor_grad_norm/agent1/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent1/actor_grad_norm/agent1/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..4986f8a31e5f5b56a229101b3a87fcd6fa2ce20f Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent1/actor_grad_norm/agent1/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent1/average_step_adversarial_rewards/agent1/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent1/average_step_adversarial_rewards/agent1/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..8ea299d81cd36ea43597bee36d7431e21907ce43 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent1/average_step_adversarial_rewards/agent1/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent1/average_step_rewards/agent1/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent1/average_step_rewards/agent1/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..b5c1a03f908a680f139e6017d5706af31de0eab6 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent1/average_step_rewards/agent1/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent1/critic_grad_norm/agent1/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent1/critic_grad_norm/agent1/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..0f61431aacc523aae8810bd883ff89c4992518d7 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent1/critic_grad_norm/agent1/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent1/dist_entropy/agent1/dist_entropy/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent1/dist_entropy/agent1/dist_entropy/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..00a09d7e7d10715d2e88e12ab7f71e22d5796546 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent1/dist_entropy/agent1/dist_entropy/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent1/policy_loss/agent1/policy_loss/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent1/policy_loss/agent1/policy_loss/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..d8ed8324c620a51315d85699a595e3e2176b2a4f Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent1/policy_loss/agent1/policy_loss/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent1/ratio/agent1/ratio/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent1/ratio/agent1/ratio/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..a1088b55846d5c6aee14b5a23991e65beed2da46 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent1/ratio/agent1/ratio/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent1/value_loss/agent1/value_loss/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent1/value_loss/agent1/value_loss/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..2a802386f21de51dddef7d35f8605c91ac444eb9 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent1/value_loss/agent1/value_loss/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent2/actor_grad_norm/agent2/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent2/actor_grad_norm/agent2/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..75b08ebeabb1a48b0b1df2d02d23499ff9bb52d0 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent2/actor_grad_norm/agent2/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent2/average_step_adversarial_rewards/agent2/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent2/average_step_adversarial_rewards/agent2/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..073bbd6311d0c447775a673689551a1b3a2c8959 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent2/average_step_adversarial_rewards/agent2/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent2/average_step_rewards/agent2/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent2/average_step_rewards/agent2/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..36478ab39f048a4905b5b5badcaf7addb0fe02e7 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent2/average_step_rewards/agent2/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent2/critic_grad_norm/agent2/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent2/critic_grad_norm/agent2/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..6a9da58b85aefae77928b32f3e353e4625c123fa Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent2/critic_grad_norm/agent2/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent2/dist_entropy/agent2/dist_entropy/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent2/dist_entropy/agent2/dist_entropy/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..02ed869294e1329d3574784d85a10e7e389ed091 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent2/dist_entropy/agent2/dist_entropy/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent2/policy_loss/agent2/policy_loss/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent2/policy_loss/agent2/policy_loss/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..cb6e9ee4725eda6e2645598b68b9c41b6687e6d1 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent2/policy_loss/agent2/policy_loss/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent2/ratio/agent2/ratio/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent2/ratio/agent2/ratio/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..fd0a76b0efbf25f1de3610be17c4ce49d41bee73 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent2/ratio/agent2/ratio/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent2/value_loss/agent2/value_loss/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent2/value_loss/agent2/value_loss/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..1c40c48451574d987e6292f083b6ad34631068e1 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent2/value_loss/agent2/value_loss/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent3/actor_grad_norm/agent3/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent3/actor_grad_norm/agent3/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..9481b488861b8141162b314a0e8d5eee6c1a30f3 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent3/actor_grad_norm/agent3/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent3/average_step_adversarial_rewards/agent3/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent3/average_step_adversarial_rewards/agent3/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..8ff364a254f6034030814016b752946787a5cb10 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent3/average_step_adversarial_rewards/agent3/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent3/average_step_rewards/agent3/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent3/average_step_rewards/agent3/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..76486bb2d2e1ead3bfad640975e362605b8562f8 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent3/average_step_rewards/agent3/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent3/critic_grad_norm/agent3/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent3/critic_grad_norm/agent3/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..592fd9831c3a920d20b96e511ba1dc6eab2c5aa5 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent3/critic_grad_norm/agent3/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent3/dist_entropy/agent3/dist_entropy/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent3/dist_entropy/agent3/dist_entropy/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..2a13ecb81df9474995382ed709da9213dc5e277a Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent3/dist_entropy/agent3/dist_entropy/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent3/policy_loss/agent3/policy_loss/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent3/policy_loss/agent3/policy_loss/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..105c3588f2bafab8bea9ffd19d4d004be9a44323 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent3/policy_loss/agent3/policy_loss/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent3/ratio/agent3/ratio/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent3/ratio/agent3/ratio/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..e43d3952337ad355c7033de9aaff982105d7933f Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent3/ratio/agent3/ratio/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent3/value_loss/agent3/value_loss/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent3/value_loss/agent3/value_loss/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..eb3388dd6133077023486612d4e415136ed2df5d Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent3/value_loss/agent3/value_loss/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent4/actor_grad_norm/agent4/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent4/actor_grad_norm/agent4/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..ff4a658e64bd9092905314769e824f416e260461 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent4/actor_grad_norm/agent4/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent4/average_step_adversarial_rewards/agent4/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent4/average_step_adversarial_rewards/agent4/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..af3aacd6c7a185e330c7b1e723b511c6ab89a4b6 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent4/average_step_adversarial_rewards/agent4/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent4/average_step_rewards/agent4/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent4/average_step_rewards/agent4/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..a3552c00a642bff67950a8837615a239a2453432 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent4/average_step_rewards/agent4/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent4/critic_grad_norm/agent4/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent4/critic_grad_norm/agent4/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..beb9fc5c73ec5b7a0f102b019ed9e2ab9e76f7fa Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent4/critic_grad_norm/agent4/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent4/dist_entropy/agent4/dist_entropy/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent4/dist_entropy/agent4/dist_entropy/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..5fedbc4bc44b8d179cac072b1f3ae21be591ae87 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent4/dist_entropy/agent4/dist_entropy/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent4/policy_loss/agent4/policy_loss/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent4/policy_loss/agent4/policy_loss/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..a30d01f82fed2a7fdd8aa6a0d673cd8c0a053e86 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent4/policy_loss/agent4/policy_loss/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent4/ratio/agent4/ratio/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent4/ratio/agent4/ratio/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..3032cfe3eafe03dabccdf5232f55a510f3a2a77f Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent4/ratio/agent4/ratio/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent4/value_loss/agent4/value_loss/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent4/value_loss/agent4/value_loss/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..6e2f731f2b59f579d92ad33eb4e7ddcf737194c6 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent4/value_loss/agent4/value_loss/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent5/actor_grad_norm/agent5/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent5/actor_grad_norm/agent5/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..61afc077cd4a684adcca6190d83a3f9793c86eb4 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent5/actor_grad_norm/agent5/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent5/average_step_adversarial_rewards/agent5/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent5/average_step_adversarial_rewards/agent5/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..d8877007b7c770e35228ed8cefada6013c3a7d37 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent5/average_step_adversarial_rewards/agent5/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent5/average_step_rewards/agent5/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent5/average_step_rewards/agent5/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..efec4cb41d8fe512c2f606186268263c08499976 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent5/average_step_rewards/agent5/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent5/critic_grad_norm/agent5/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent5/critic_grad_norm/agent5/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..586caaac818390f16a04e22f31e2d935a61d3b05 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent5/critic_grad_norm/agent5/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent5/dist_entropy/agent5/dist_entropy/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent5/dist_entropy/agent5/dist_entropy/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..7f2088816448d58c548d6acc2f6a07949f00df82 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent5/dist_entropy/agent5/dist_entropy/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent5/policy_loss/agent5/policy_loss/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent5/policy_loss/agent5/policy_loss/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..de4a4d2c5a14c25185a1df38c5a8cf6632003057 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent5/policy_loss/agent5/policy_loss/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent5/ratio/agent5/ratio/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent5/ratio/agent5/ratio/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..efb67ce319e1469288c104e762dc924edffe008a Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent5/ratio/agent5/ratio/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent5/value_loss/agent5/value_loss/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent5/value_loss/agent5/value_loss/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..a9eb46c86004a90f88cf03cbd98d313cc0aca156 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent5/value_loss/agent5/value_loss/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent6/actor_grad_norm/agent6/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent6/actor_grad_norm/agent6/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..54a76d1a7a9268b5b32b35cfd9874242f5e465d3 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent6/actor_grad_norm/agent6/actor_grad_norm/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent6/average_step_adversarial_rewards/agent6/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent6/average_step_adversarial_rewards/agent6/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..56e9a9e0f3b66db48f82a045ec2cb0d9d078ea96 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent6/average_step_adversarial_rewards/agent6/average_step_adversarial_rewards/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent6/average_step_rewards/agent6/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent6/average_step_rewards/agent6/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..b72876fe3b67578e309014a673e07c534f73d776 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent6/average_step_rewards/agent6/average_step_rewards/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent6/critic_grad_norm/agent6/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent6/critic_grad_norm/agent6/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..36aad933e2ffd5ba32ff0c5df77c887b77930b97 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent6/critic_grad_norm/agent6/critic_grad_norm/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent6/dist_entropy/agent6/dist_entropy/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent6/dist_entropy/agent6/dist_entropy/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..3a2eaa9432be6bd8afe3c135d644d72699c152d5 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent6/dist_entropy/agent6/dist_entropy/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent6/policy_loss/agent6/policy_loss/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent6/policy_loss/agent6/policy_loss/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..2b87dc06ba4883b866dcca8113a3f5c20b521dc3 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent6/policy_loss/agent6/policy_loss/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent6/ratio/agent6/ratio/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent6/ratio/agent6/ratio/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..bbdef595e14ed2577e12fb6663e9bafd28ef2bbf Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent6/ratio/agent6/ratio/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/agent6/value_loss/agent6/value_loss/events.out.tfevents.1670950489.maiex1-104 b/trained_models/9191_fournights/run1/logs/agent6/value_loss/agent6/value_loss/events.out.tfevents.1670950489.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..657eb1df0dfdf34367e1fb18eb18b85b6e680800 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/agent6/value_loss/agent6/value_loss/events.out.tfevents.1670950489.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/eval_average_episode_rewards/eval_average_episode_rewards/events.out.tfevents.1670951582.maiex1-104 b/trained_models/9191_fournights/run1/logs/eval_average_episode_rewards/eval_average_episode_rewards/events.out.tfevents.1670951582.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..a213692cae75a40d95dd2cf0a6380868878b06e0 Binary files /dev/null and b/trained_models/9191_fournights/run1/logs/eval_average_episode_rewards/eval_average_episode_rewards/events.out.tfevents.1670951582.maiex1-104 differ diff --git a/trained_models/9191_fournights/run1/logs/events.out.tfevents.1670950364.maiex1-104 b/trained_models/9191_fournights/run1/logs/events.out.tfevents.1670950364.maiex1-104 new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/trained_models/9191_fournights/run1/models/actor_agent0.pt b/trained_models/9191_fournights/run1/models/actor_agent0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b42cd00a4ef535912373d43a02fea2d00ae3ba6 Binary files /dev/null and b/trained_models/9191_fournights/run1/models/actor_agent0.pt differ diff --git a/trained_models/9191_fournights/run1/models/actor_agent1.pt b/trained_models/9191_fournights/run1/models/actor_agent1.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0b7df31b604174c8a2f17d149eaee65ac6cae84 Binary files /dev/null and b/trained_models/9191_fournights/run1/models/actor_agent1.pt differ diff --git a/trained_models/9191_fournights/run1/models/actor_agent2.pt b/trained_models/9191_fournights/run1/models/actor_agent2.pt new file mode 100644 index 0000000000000000000000000000000000000000..6416145f54fa8d89292236a32c849cf4d9a437e0 Binary files /dev/null and b/trained_models/9191_fournights/run1/models/actor_agent2.pt differ diff --git a/trained_models/9191_fournights/run1/models/actor_agent3.pt b/trained_models/9191_fournights/run1/models/actor_agent3.pt new file mode 100644 index 0000000000000000000000000000000000000000..4fcefafe4c801dbf4c82fb919f8662d76211d0e5 Binary files /dev/null and b/trained_models/9191_fournights/run1/models/actor_agent3.pt differ diff --git a/trained_models/9191_fournights/run1/models/actor_agent4.pt b/trained_models/9191_fournights/run1/models/actor_agent4.pt new file mode 100644 index 0000000000000000000000000000000000000000..baa8d53e803306fbb651f8f53f529e209b11df36 Binary files /dev/null and b/trained_models/9191_fournights/run1/models/actor_agent4.pt differ diff --git a/trained_models/9191_fournights/run1/models/actor_agent5.pt b/trained_models/9191_fournights/run1/models/actor_agent5.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7ac11d20a30f414e6ac2dfec763604487e83644 Binary files /dev/null and b/trained_models/9191_fournights/run1/models/actor_agent5.pt differ diff --git a/trained_models/9191_fournights/run1/models/actor_agent6.pt b/trained_models/9191_fournights/run1/models/actor_agent6.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ef58eb1f7265473ddd9ac3e9a865955b56fcd59 Binary files /dev/null and b/trained_models/9191_fournights/run1/models/actor_agent6.pt differ diff --git a/trained_models/9191_fournights/run1/models/critic_agent0.pt b/trained_models/9191_fournights/run1/models/critic_agent0.pt new file mode 100644 index 0000000000000000000000000000000000000000..86f1aae0a32456c59087457d363f9a4f3cba97bd Binary files /dev/null and b/trained_models/9191_fournights/run1/models/critic_agent0.pt differ diff --git a/trained_models/9191_fournights/run1/models/critic_agent1.pt b/trained_models/9191_fournights/run1/models/critic_agent1.pt new file mode 100644 index 0000000000000000000000000000000000000000..882aa316e8b141e5865516c0fceac00233602c03 Binary files /dev/null and b/trained_models/9191_fournights/run1/models/critic_agent1.pt differ diff --git a/trained_models/9191_fournights/run1/models/critic_agent2.pt b/trained_models/9191_fournights/run1/models/critic_agent2.pt new file mode 100644 index 0000000000000000000000000000000000000000..66380ca1e225c715ef207159b4d7ee35d9f0fc0b Binary files /dev/null and b/trained_models/9191_fournights/run1/models/critic_agent2.pt differ diff --git a/trained_models/9191_fournights/run1/models/critic_agent3.pt b/trained_models/9191_fournights/run1/models/critic_agent3.pt new file mode 100644 index 0000000000000000000000000000000000000000..39e2e7b6639122a17f123ed34325a62922dae2e2 Binary files /dev/null and b/trained_models/9191_fournights/run1/models/critic_agent3.pt differ diff --git a/trained_models/9191_fournights/run1/models/critic_agent4.pt b/trained_models/9191_fournights/run1/models/critic_agent4.pt new file mode 100644 index 0000000000000000000000000000000000000000..feba6db3048c53b90d1ad0ac66387774f87bbec7 Binary files /dev/null and b/trained_models/9191_fournights/run1/models/critic_agent4.pt differ diff --git a/trained_models/9191_fournights/run1/models/critic_agent5.pt b/trained_models/9191_fournights/run1/models/critic_agent5.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd0c23c2553b6d5d0ccd036369a5e57bc3d7d33a Binary files /dev/null and b/trained_models/9191_fournights/run1/models/critic_agent5.pt differ diff --git a/trained_models/9191_fournights/run1/models/critic_agent6.pt b/trained_models/9191_fournights/run1/models/critic_agent6.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb7551a659ffa7870636a371ce8276ef054e2454 Binary files /dev/null and b/trained_models/9191_fournights/run1/models/critic_agent6.pt differ