Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
sensor-control
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Emil Karlsson
sensor-control
Commits
6a5cfb13
Commit
6a5cfb13
authored
2 years ago
by
dennismalmgren
Browse files
Options
Downloads
Patches
Plain Diff
Updated to run eval
parent
9258ccd2
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
configs/config.py
+2
-2
2 additions, 2 deletions
configs/config.py
multiagent_rl_eval.py
+408
-0
408 additions, 0 deletions
multiagent_rl_eval.py
with
410 additions
and
2 deletions
configs/config.py
+
2
−
2
View file @
6a5cfb13
...
@@ -290,7 +290,7 @@ def get_config():
...
@@ -290,7 +290,7 @@ def get_config():
parser
.
add_argument
(
"
--log_interval
"
,
type
=
int
,
parser
.
add_argument
(
"
--log_interval
"
,
type
=
int
,
default
=
1
,
help
=
"
time duration between contiunous twice log printing.
"
)
default
=
1
,
help
=
"
time duration between contiunous twice log printing.
"
)
parser
.
add_argument
(
"
--model_dir
"
,
type
=
str
,
parser
.
add_argument
(
"
--model_dir
"
,
type
=
str
,
default
=
None
,
help
=
"
by default None. set the path to pretrained model.
"
)
default
=
"
/mnt/f/repos/results/scontrol/happo/check/9967/run1/models
"
,
help
=
"
by default None. set the path to pretrained model.
"
)
# eval parameters
# eval parameters
parser
.
add_argument
(
"
--use_eval
"
,
action
=
'
store_true
'
,
parser
.
add_argument
(
"
--use_eval
"
,
action
=
'
store_true
'
,
...
@@ -298,7 +298,7 @@ def get_config():
...
@@ -298,7 +298,7 @@ def get_config():
parser
.
add_argument
(
"
--eval_interval
"
,
type
=
int
,
parser
.
add_argument
(
"
--eval_interval
"
,
type
=
int
,
default
=
10
,
help
=
"
time duration between contiunous twice evaluation progress.
"
)
default
=
10
,
help
=
"
time duration between contiunous twice evaluation progress.
"
)
parser
.
add_argument
(
"
--eval_episodes
"
,
type
=
int
,
parser
.
add_argument
(
"
--eval_episodes
"
,
type
=
int
,
default
=
1
,
help
=
"
number of episodes of a single evaluation.
"
)
default
=
2
,
help
=
"
number of episodes of a single evaluation.
"
)
# render parameters
# render parameters
parser
.
add_argument
(
"
--save_gifs
"
,
action
=
'
store_true
'
,
parser
.
add_argument
(
"
--save_gifs
"
,
action
=
'
store_true
'
,
...
...
This diff is collapsed.
Click to expand it.
multiagent_rl_eval.py
0 → 100644
+
408
−
0
View file @
6a5cfb13
import
argparse
import
datetime
import
logging
import
sys
import
os
sys
.
path
.
append
(
"
../
"
)
from
pathlib
import
Path
import
random
import
time
import
os
from
configs.config
import
get_config
import
gym
from
gym.wrappers.time_limit
import
TimeLimit
from
gym.wrappers.flatten_observation
import
FlattenObservation
import
gym.spaces
as
spaces
import
jax
import
numpy
as
np
import
torch
import
torch.optim
as
optim
import
torch.nn
as
nn
from
observation_normalization_wrapper
import
ObservationNormalizationWrapper
from
torch.utils.tensorboard
import
SummaryWriter
import
setproctitle
from
configs.config
import
get_config
from
runners.separated.sensor_runner
import
SensorRunner
as
Runner
from
envs.env_wrappers
import
ShareDummyVecEnv
,
ShareSubprocVecEnv
from
sensor_control_env
import
SensorControlEnv
from
discretized_action_wrapper
import
DiscretizedActionWrapper
from
flatten_observation_wrapper
import
FlattenObservationWrapper
from
red_team_behaviour_wrapper
import
RedTeamBehaviourWrapper
from
scalarized_reward_wrapper
import
ScalarizedRewardWrapper
from
multiagent_wrapper
import
MultiagentWrapper
from
behaviour
import
behaviour_choices
import
constants
def
make_train_env
(
all_args
):
def
get_env_fn
(
rank
):
def
init_env
():
if
all_args
.
env_name
==
"
scontrol
"
:
max_team_members
=
7
env_config
=
{
"
max_team_size
"
:
max_team_members
,
"
scenario_id
"
:
0
,
'
visualise_delay
'
:
0
,
}
#Weighting of objectives
reward_wrapper_config
=
{
"
share_detected
"
:
1.0
,
"
share_delay_detected
"
:
1.0
,
"
share_radar_actions
"
:
0.0
,
"
position_error
"
:
0.0
,
"
position_delay_error
"
:
0.0
}
red_behaviour_name
=
all_args
.
red
env
=
SensorControlEnv
(
env_config
)
env
=
ScalarizedRewardWrapper
(
env
,
reward_wrapper_config
)
env
=
ObservationNormalizationWrapper
(
env
)
env
=
FlattenObservationWrapper
(
env
)
env
=
RedTeamBehaviourWrapper
(
env
,
red_behaviour_name
)
env
=
DiscretizedActionWrapper
(
env
)
env
=
TimeLimit
(
env
,
max_episode_steps
=
constants
.
SCENARIO_LENGTH
)
env
=
MultiagentWrapper
(
env
)
else
:
print
(
"
Can not support the
"
+
all_args
.
env_name
+
"
environment.
"
)
raise
NotImplementedError
return
env
return
init_env
if
all_args
.
n_rollout_threads
==
1
:
return
ShareDummyVecEnv
([
get_env_fn
(
0
)])
else
:
return
ShareSubprocVecEnv
([
get_env_fn
(
i
)
for
i
in
range
(
all_args
.
n_rollout_threads
)])
def
make_eval_env
(
all_args
):
def
get_env_fn
(
rank
):
def
init_env
():
if
all_args
.
env_name
==
"
scontrol
"
:
max_team_members
=
7
env_config
=
{
"
max_team_size
"
:
max_team_members
,
"
scenario_id
"
:
0
,
'
visualise_delay
'
:
0
,
}
reward_wrapper_config
=
{
"
share_detected
"
:
1.0
,
"
share_delay_detected
"
:
1.0
,
"
share_radar_actions
"
:
0.0
,
"
position_error
"
:
0.0
,
"
position_delay_error
"
:
0.0
}
red_behaviour_name
=
all_args
.
red
env
=
SensorControlEnv
(
env_config
)
env
=
ScalarizedRewardWrapper
(
env
,
reward_wrapper_config
)
env
=
ObservationNormalizationWrapper
(
env
)
env
=
FlattenObservationWrapper
(
env
)
env
=
RedTeamBehaviourWrapper
(
env
,
red_behaviour_name
)
env
=
DiscretizedActionWrapper
(
env
)
env
=
TimeLimit
(
env
,
max_episode_steps
=
constants
.
SCENARIO_LENGTH
)
env
=
MultiagentWrapper
(
env
)
else
:
print
(
"
Can not support the
"
+
all_args
.
env_name
+
"
environment.
"
)
raise
NotImplementedError
return
env
return
init_env
if
all_args
.
n_eval_rollout_threads
==
1
:
return
ShareDummyVecEnv
([
get_env_fn
(
0
)])
else
:
return
ShareSubprocVecEnv
([
get_env_fn
(
i
)
for
i
in
range
(
all_args
.
n_eval_rollout_threads
)])
def
parse_args
(
args
,
parser
):
parser
.
add_argument
(
"
--red
"
,
default
=
"
no
"
,
choices
=
behaviour_choices
,
type
=
str
.
lower
)
parser
.
add_argument
(
"
--use_single_network
"
,
action
=
'
store_true
'
,
default
=
False
)
all_args
=
parser
.
parse_known_args
(
args
)[
0
]
return
all_args
def
main
(
args
)
->
None
:
# Create argument parse
parser
=
get_config
()
all_args
=
parse_args
(
args
,
parser
)
print
(
"
all config:
"
,
all_args
)
if
all_args
.
seed_specify
:
all_args
.
seed
=
all_args
.
runing_id
#I really dislike misspelling but I don't want to change the original code.
else
:
all_args
.
seed
=
np
.
random
.
randint
(
1000
,
10000
)
print
(
"
seed is :
"
,
all_args
.
seed
)
if
all_args
.
cuda
and
torch
.
cuda
.
is_available
():
print
(
"
choose to use gpu...
"
)
device
=
torch
.
device
(
"
cuda:0
"
)
torch
.
set_num_threads
(
all_args
.
n_training_threads
)
if
all_args
.
cuda_deterministic
:
torch
.
backends
.
cudnn
.
benchmark
=
False
torch
.
backends
.
cudnn
.
deterministic
=
True
else
:
print
(
"
choose to use cpu...
"
)
device
=
torch
.
device
(
"
cpu
"
)
torch
.
set_num_threads
(
all_args
.
n_training_threads
)
run_dir
=
Path
(
os
.
path
.
split
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)))[
0
]
+
"
/results
"
)
/
all_args
.
env_name
/
all_args
.
algorithm_name
/
all_args
.
experiment_name
/
str
(
all_args
.
seed
)
if
not
run_dir
.
exists
():
os
.
makedirs
(
str
(
run_dir
))
if
not
run_dir
.
exists
():
curr_run
=
'
run1
'
else
:
exst_run_nums
=
[
int
(
str
(
folder
.
name
).
split
(
'
run
'
)[
1
])
for
folder
in
run_dir
.
iterdir
()
if
str
(
folder
.
name
).
startswith
(
'
run
'
)]
if
len
(
exst_run_nums
)
==
0
:
curr_run
=
'
run1
'
else
:
curr_run
=
'
run%i
'
%
(
max
(
exst_run_nums
)
+
1
)
run_dir
=
run_dir
/
curr_run
if
not
run_dir
.
exists
():
os
.
makedirs
(
str
(
run_dir
))
setproctitle
.
setproctitle
(
str
(
all_args
.
algorithm_name
)
+
"
-
"
+
str
(
all_args
.
env_name
)
+
"
-
"
+
str
(
all_args
.
experiment_name
)
+
"
@
"
+
str
(
all_args
.
user_name
))
# seed
torch
.
manual_seed
(
all_args
.
seed
)
torch
.
cuda
.
manual_seed_all
(
all_args
.
seed
)
np
.
random
.
seed
(
all_args
.
seed
)
envs
=
make_train_env
(
all_args
)
eval_envs
=
make_eval_env
(
all_args
)
if
all_args
.
use_eval
else
None
num_agents
=
envs
.
n_agents
config
=
{
"
all_args
"
:
all_args
,
"
envs
"
:
envs
,
"
eval_envs
"
:
eval_envs
,
"
num_agents
"
:
num_agents
,
"
device
"
:
device
,
"
run_dir
"
:
run_dir
,
}
runner
=
Runner
(
config
)
#uses config.py/model_dir to load parameters into the model
runner
.
eval
(
total_num_steps
=
1
)
#uses config.py/eval_episodes to identify how many episodes of eval to run.
# post process
envs
.
close
()
if
all_args
.
use_eval
and
eval_envs
is
not
envs
:
eval_envs
.
close
()
runner
.
writter
.
export_scalars_to_json
(
str
(
runner
.
log_dir
+
'
/summary.json
'
))
runner
.
writter
.
close
()
if
__name__
==
'
__main__
'
:
main
(
sys
.
argv
[
1
:])
from
rl_behaviour
import
rl_behaviour_choices
from
behaviour
import
behaviour_choices
import
constants
from
scenario
import
eval_to_csv
from
scenario_io
import
write_scenario_to_file
from
single_agent_wrapper
import
SingleAgentWrapper
from
single_agent_agent
import
Agent
from
runners.separated.base_runner
import
Runner
def
prepare_cumulative_evaluation
(
env
:
gym
.
Env
):
base_env
=
env
.
unwrapped
reward
=
base_env
.
reward_space
.
sample
()
zero_reward
=
jax
.
tree_util
.
tree_map
(
lambda
e
:
0.0
,
reward
)
return
zero_reward
def
prepare_cumulative_scalarized_evaluation_dict
(
env
:
gym
.
Env
):
reward
=
env
.
reward_space
.
sample
()
zero_reward
=
jax
.
tree_util
.
tree_map
(
lambda
e
:
0.0
,
reward
)
return
zero_reward
def
train_on_scenario
(
red_behaviour_name
:
str
,
config
:
dict
,
logger
:
logging
.
Logger
,
rng
:
np
.
random
.
Generator
):
max_team_members
=
7
scenario_id
=
rng
.
integers
(
0
,
1000000
)
env_config
=
{
"
scenario_id
"
:
scenario_id
,
#ignore this for training.
"
use_static
"
:
False
,
#no_greens have to be 4 and no_reds 2 if use_static is set to True.
"
no_greens
"
:
7
,
#rng.integers(2, max_team_members),
"
no_reds
"
:
7
,
#rng.integers(2, max_team_members),
'
visualise_delay
'
:
0
,
}
reward_wrapper_config
=
{
"
share_detected
"
:
1.0
,
"
share_delay_detected
"
:
1.0
,
"
share_radar_actions
"
:
0.0
,
"
position_error
"
:
0.0
,
"
position_delay_error
"
:
0.0
}
logger
.
info
(
"
Generate scenario
"
)
env
=
SensorControlEnv
(
env_config
)
env
=
ScalarizedRewardWrapper
(
env
,
reward_wrapper_config
)
env
=
FlattenObservationWrapper
(
env
,
max_sa_size
=
max_team_members
)
env
=
RedTeamBehaviourWrapper
(
env
,
red_behaviour_name
)
#This one is incredibly slow.
env
=
ObservationNormalizationWrapper
(
env
,
max_sa_size
=
max_team_members
)
env
=
DiscretizedActionWrapper
(
env
)
#env = SingleAgentWrapper(env, max_team_members=max_team_members, max_sa_size=max_team_members)
env
=
FlattenObservation
(
env
)
env
=
TimeLimit
(
env
,
max_episode_steps
=
constants
.
SCENARIO_LENGTH
)
run_name
=
f
"
run_
{
int
(
time
.
time
())
}
"
config
[
"
env
"
]
=
env
config
[
'
num_agents
'
]
=
max_team_members
runner
=
Runner
(
config
)
runner
.
run
()
# #Start training
# agent = Agent(env)
# optimizer = optim.Adam(agent.parameters(), lr=learning_rate, eps=1e-5)
# if load_checkpoint_path:
# agent.load_state_dict(torch.load(os.path.join(load_checkpoint_path, "agent.pt")))
# optimizer.load_state_dict(torch.load(os.path.join(load_checkpoint_path, "optimizer.pt")))
# num_steps = constants.SCENARIO_LENGTH #Number of steps in a rollout
# batch_size = num_steps
# num_minibatches = 4
# minibatch_size = batch_size // num_minibatches
# # Storage setup
# obs = torch.zeros((num_steps,) + env.observation_space.shape)
# actions = torch.zeros((num_steps,) + env.action_space.shape)
# logprobs = torch.zeros((num_steps,))
# rewards = torch.zeros((num_steps,))
# terminateds = torch.zeros((num_steps,))
# truncateds = torch.zeros((num_steps,))
# values = torch.zeros((num_steps,))
# num_updates = total_timesteps // batch_size
# # cumulative_evaluation_dict = prepare_cumulative_evaluation(env)
# # cumulative_scalarized_evaluation_dict = prepare_cumulative_scalarized_evaluation_dict(env)
# logger.info("Start training")
# run_name = f"run_{int(time.time())}"
# writer = SummaryWriter(f"runs/{run_name}")
# start = time.time()
# global_step = 0
# for update in range(1, num_updates + 1):
# next_obs, info = env.reset(seed=rng.integers(2**32))
# next_obs = torch.tensor(next_obs)
# next_terminated = torch.zeros(1, dtype=torch.float)
# next_truncated = torch.zeros(1, dtype=torch.float)
# for step in range(0, num_steps):
# global_step += 1
# obs[step] = next_obs
# terminateds[step] = next_terminated
# truncateds[step] = next_truncated
# with torch.no_grad():
# action, logprob, _, value = agent.get_action_and_value(next_obs)
# values[step] = value.flatten()
# actions[step] = action
# logprobs[step] = logprob
# next_obs, reward, terminated, truncated, info = env.step(action)
# rewards[step] = torch.tensor(reward).view(-1)
# next_obs, next_terminated, next_truncated = torch.tensor(next_obs), torch.tensor(float(terminated)), torch.tensor(float(truncated))
# writer.add_scalar("charts/episodic_return", sum(rewards), global_step)
# logger.info(f"global step: {global_step}, episodic return: {sum(rewards)}")
# with torch.no_grad():
# next_value = agent.get_value(next_obs).reshape(1, -1)
# advantages = torch.zeros_like(rewards)
# lastgaelam = 0
# for t in reversed(range(num_steps)):
# if t == num_steps - 1:
# nextnonterminal = 1.0 - next_terminated
# nextnontruncated = 1.0 - next_truncated
# nextvalues = next_value
# else:
# nextnonterminal = 1.0 - terminateds[t + 1]
# nextnontruncated = 1.0 - truncateds[t + 1]
# nextvalues = values[t + 1]
# delta = rewards[t] + gamma * nextvalues * nextnonterminal - values[t]
# advantages[t] = lastgaelam = delta + gamma * 0.95 * nextnontruncated * lastgaelam
# returns = advantages + values
# # flattening
# b_obs = obs.reshape((-1,) + env.observation_space.shape)
# b_logprobs = logprobs.reshape(-1)
# b_actions = actions.reshape((-1,) + env.action_space.shape)
# b_advantages = advantages.reshape(-1)
# b_returns = returns.reshape(-1)
# b_values = values.reshape(-1)
# b_inds = np.arange(batch_size)
# clipfracs = []
# for epoch in range(update_epochs):
# np.random.shuffle(b_inds)
# for start in range(0, batch_size, minibatch_size):
# end = start + minibatch_size
# mbinds = b_inds[start:end]
# _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mbinds], b_actions.long()[mbinds])
# logratio = newlogprob - b_logprobs[mbinds]
# ratio = logratio.exp()
# mb_advantages = b_advantages[mbinds]
# mb_advantages = (mb_advantages - mb_advantages.mean()) /(mb_advantages.std() + 1e-8)
# # Policy loss
# pg_loss1 = -mb_advantages * ratio
# pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - 0.2, 1 + 0.2)
# pg_loss = torch.max(pg_loss1, pg_loss2).mean()
# # Value loss
# v_loss = 0.5 * ((newvalue - b_returns[mbinds]) ** 2).mean()
# entropy_loss = entropy.mean()
# loss = pg_loss - 0.01 * entropy_loss + vf_coef * v_loss
# # Do the update
# optimizer.zero_grad()
# loss.backward()
# nn.utils.clip_grad_norm_(agent.parameters(), 0.5) #Might want to tune the 0.5. maybe.
# optimizer.step()
# #y_pred, y_true = b_values, b_returns
# #var_y = np.var(y_true)
# #explained_var = np.nan if var_y == 0 else 1 - np.var(y_pred - y_true) / var_y
# writer.add_scalar("losses/value_loss", v_loss.item(), global_step)
# writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step)
# writer.add_scalar("losses/entropy", entropy_loss.item(), global_step)
# logger.info(f"global step: {global_step}, value_loss: {v_loss.item()}")
# logger.info(f"global step: {global_step}, policy_loss: {pg_loss.item()}")
# logger.info(f"global step: {global_step}, entropy: {entropy_loss.item()}")
# #Checkpoint
# if update % checkpoint_every_k_updates == 0:
# logger.info("Saving checkpoint")
# folder_path = f"checkpoints/{run_name}/{global_step}/"
# os.makedirs(folder_path, exist_ok = True)
# torch.save(agent.state_dict(), os.path.join(folder_path, "agent.pt"))
# torch.save(optimizer.state_dict(), os.path.join(folder_path, "optimizer.pt"))
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment