Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
sensor-control
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Emil Karlsson
sensor-control
Commits
4978b3ba
Commit
4978b3ba
authored
2 years ago
by
Dennis Malmgren
Browse files
Options
Downloads
Patches
Plain Diff
Working on it
parent
e3fd9686
Branches
mdp_analysis
Branches containing commit
No related tags found
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
configs/config.py
+2
-2
2 additions, 2 deletions
configs/config.py
multiagent_rl_train.py
+20
-20
20 additions, 20 deletions
multiagent_rl_train.py
scalarized_reward_wrapper.py
+15
-7
15 additions, 7 deletions
scalarized_reward_wrapper.py
scenario.py
+2
-2
2 additions, 2 deletions
scenario.py
with
39 additions
and
31 deletions
configs/config.py
+
2
−
2
View file @
4978b3ba
...
...
@@ -184,7 +184,7 @@ def get_config():
parser
.
add_argument
(
"
--n_training_threads
"
,
type
=
int
,
default
=
1
,
help
=
"
Number of torch threads for training
"
)
parser
.
add_argument
(
"
--n_rollout_threads
"
,
type
=
int
,
default
=
2
0
,
help
=
"
Number of parallel envs for training rollouts
"
)
default
=
3
0
,
help
=
"
Number of parallel envs for training rollouts
"
)
parser
.
add_argument
(
"
--n_eval_rollout_threads
"
,
type
=
int
,
default
=
1
,
help
=
"
Number of parallel envs for evaluating rollouts
"
)
parser
.
add_argument
(
"
--n_render_rollout_threads
"
,
type
=
int
,
...
...
@@ -267,7 +267,7 @@ def get_config():
parser
.
add_argument
(
"
--clip_param
"
,
type
=
float
,
default
=
0.2
,
help
=
'
ppo clip parameter (default: 0.2)
'
)
parser
.
add_argument
(
"
--num_mini_batch
"
,
type
=
int
,
default
=
2
0
,
help
=
'
number of batches for ppo (default: 1)
'
)
default
=
3
0
,
help
=
'
number of batches for ppo (default: 1)
'
)
parser
.
add_argument
(
"
--entropy_coef
"
,
type
=
float
,
default
=
0.01
,
help
=
'
entropy term coefficient (default: 0.01)
'
)
parser
.
add_argument
(
"
--value_loss_coef
"
,
type
=
float
,
...
...
This diff is collapsed.
Click to expand it.
multiagent_rl_train.py
+
20
−
20
View file @
4978b3ba
...
...
@@ -70,17 +70,17 @@ def make_train_env(all_args):
# }
reward_wrapper_config
=
{
# values= weights, only non zero weights will count to reward
"
aircraft_share_detected
"
:
0.0
1
,
# green detect red without
"
share_delay_detected
"
:
0.0
1
,
# After 300 timesteps, how many red agents detected
"
share_radar_actions
"
:
0.0
1
,
# Number of times radar used
"
radar_share_detected
"
:
0.0
1
,
# green detect red with radar
"
aircraft_share_detected
"
:
0.0
,
# green detect red without
"
share_delay_detected
"
:
0.0
,
# After 300 timesteps, how many red agents detected
"
share_radar_actions
"
:
0.0
,
# Number of times radar used
"
radar_share_detected
"
:
0.0
,
# green detect red with radar
"
position_error
"
:
0.0001
,
#
"
position_delay_error
"
:
0.0001
,
#After 300 timesteps,
'
enemy_aircraft_share_detected
'
:
0.0
1
,
"
enemy_share_delay_detected
"
:
0.0
1
,
"
enemy_share_radar_actions
"
:
0.0
1
,
"
enemy_radar_share_detected
"
:
0.0
1
,
'
enemy_aircraft_share_detected
'
:
0.0
,
"
enemy_share_delay_detected
"
:
0.0
,
"
enemy_share_radar_actions
"
:
0.0
,
"
enemy_radar_share_detected
"
:
0.0
,
"
enemy_position_error
"
:
0.0001
,
"
enemy_position_delay_error
"
:
0.0001
}
...
...
@@ -124,19 +124,19 @@ def make_eval_env(all_args):
reward_wrapper_config
=
{
# values= weights, only non zero weights will count to reward
"
aircraft_share_detected
"
:
1
,
# green detect red without
"
share_delay_detected
"
:
1
,
# After 300 timesteps, how many red agents detected
"
share_radar_actions
"
:
1
,
# Number of times radar used
"
radar_share_detected
"
:
1
,
# green detect red with radar
"
position_error
"
:
1
,
#
"
position_delay_error
"
:
1
,
#After 300 timesteps,
"
aircraft_share_detected
"
:
0.0
,
# green detect red without
"
share_delay_detected
"
:
0.0
,
# After 300 timesteps, how many red agents detected
"
share_radar_actions
"
:
0.0
,
# Number of times radar used
"
radar_share_detected
"
:
0.0
,
# green detect red with radar
"
position_error
"
:
0.000
1
,
#
"
position_delay_error
"
:
0.000
1
,
#After 300 timesteps,
'
enemy_aircraft_share_detected
'
:
1
,
"
enemy_share_delay_detected
"
:
1
,
"
enemy_share_radar_actions
"
:
1
,
"
enemy_radar_share_detected
"
:
1
,
"
enemy_position_error
"
:
1
,
"
enemy_position_delay_error
"
:
1
'
enemy_aircraft_share_detected
'
:
0.0
,
"
enemy_share_delay_detected
"
:
0.0
,
"
enemy_share_radar_actions
"
:
0.0
,
"
enemy_radar_share_detected
"
:
0.0
,
"
enemy_position_error
"
:
0.000
1
,
"
enemy_position_delay_error
"
:
0.000
1
}
red_behaviour_name
=
all_args
.
red_behaviour
rand_red_behaviour
=
all_args
.
rand_red_behaviour
...
...
This diff is collapsed.
Click to expand it.
scalarized_reward_wrapper.py
+
15
−
7
View file @
4978b3ba
...
...
@@ -44,17 +44,25 @@ class ScalarizedRewardWrapper(gym.RewardWrapper):
def
reward
(
self
,
reward
:
MultiAgentMultiObjectiveReward
)
->
MultiAgentReward
:
# if self.eval_==True:
#position error is a negative number that increases in magnitude with the error.
#at the 'worst' it is -600.0
new_reward
=
dict
({
agent_id
:
(
0.01
*
reward
[
agent_id
][
'
enemy_position_error
'
]
-
0.01
*
reward
[
agent_id
][
'
position_error
'
])
+
(
0.01
*
reward
[
agent_id
][
'
enemy_position_delay_error
'
]
-
0.01
*
reward
[
agent_id
][
'
position_delay_error
'
])
+
(
reward
[
agent_id
][
'
aircraft_share_detected
'
]
-
reward
[
agent_id
][
'
enemy_aircraft_share_detected
'
])
+
(
reward
[
agent_id
][
'
share_delay_detected
'
]
-
reward
[
agent_id
][
'
enemy_share_delay_detected
'
])
+
(
0.1
*
reward
[
agent_id
][
'
radar_share_detected
'
]
-
0.1
*
reward
[
agent_id
][
'
enemy_radar_share_detected
'
])
+
(
0.1
*
reward
[
agent_id
][
'
enemy_share_radar_actions
'
]
-
0.1
*
reward
[
agent_id
][
'
radar_share_detected
'
])
agent_id
:
(
0.00001
*
(
reward
[
agent_id
][
'
position_error
'
]
-
reward
[
agent_id
][
'
enemy_position_error
'
]))
+
(
0.00001
*
(
reward
[
agent_id
][
'
position_delay_error
'
]
-
reward
[
agent_id
][
'
enemy_position_delay_error
'
]))
for
agent_id
in
reward
})
# if self.eval_==True:
# new_reward = dict({
# agent_id: (0.01*reward[agent_id]['enemy_position_error']-0.01*reward[agent_id]['position_error'])
# + (0.01*reward[agent_id]['enemy_position_delay_error']-0.01*reward[agent_id]['position_delay_error'])
# + (reward[agent_id]['aircraft_share_detected']-reward[agent_id]['enemy_aircraft_share_detected'])
# + (reward[agent_id]['share_delay_detected']-reward[agent_id]['enemy_share_delay_detected'])
# + (0.1*reward[agent_id]['radar_share_detected']-0.1*reward[agent_id]['enemy_radar_share_detected'])
# + (0.1*reward[agent_id]['enemy_share_radar_actions']-0.1*reward[agent_id]['radar_share_detected'])
# for agent_id in reward
# })
# else:
# new_reward = dict({
# # agent_id: (0.01*reward[agent_id]['enemy_position_error']-0.01*reward[agent_id]['position_error'])
...
...
This diff is collapsed.
Click to expand it.
scenario.py
+
2
−
2
View file @
4978b3ba
...
...
@@ -203,7 +203,7 @@ def evaluate_position(state: State) -> tuple[float, float]:
not_detected
=
[
a_o
.
aircraft
for
a_o
in
state
.
green
if
a_o
.
aircraft
.
name
not
in
detected_name
]
for
not_detected_aircraft
in
not_detected
:
dist
=
141
dist
=
600
dist_red
+=
dist
dist_green
=
0
...
...
@@ -219,7 +219,7 @@ def evaluate_position(state: State) -> tuple[float, float]:
not_detected
=
[
a_o
.
aircraft
for
a_o
in
state
.
red
if
a_o
.
aircraft
.
name
not
in
detected_name
]
for
not_detected_aircraft
in
not_detected
:
dist
=
141
dist
=
600
dist_green
+=
dist
dist_red
=
dist_red
/
(
len
(
state
.
green
)
*
len
(
state
.
red
))
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment