Skip to content

Commit

Permalink
Merge pull request #184 from Replicable-MARL/rllib_1.8.0_dev
Browse files Browse the repository at this point in the history
Rllib 1.8.0 dev
  • Loading branch information
Theohhhu committed Sep 5, 2023
2 parents aea618a + bd3c560 commit d9d4c62
Show file tree
Hide file tree
Showing 20 changed files with 263 additions and 15 deletions.
5 changes: 5 additions & 0 deletions marllib/envs/base_env/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,12 @@
ENV_REGISTRY["mpe"] = str(e)


try:
from marllib.envs.base_env.gymnasium_mpe import RLlibMPE_Gymnasium

ENV_REGISTRY["gymnasium_mpe"] = RLlibMPE_Gymnasium
except Exception as e:
ENV_REGISTRY["gymnasium_mpe"] = str(e)

try:
from marllib.envs.base_env.mamujoco import RLlibMAMujoco
Expand Down
33 changes: 33 additions & 0 deletions marllib/envs/base_env/config/gymnasium_mpe.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# MIT License

# Copyright (c) 2023 Replicable-MARL

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

env: gymnasium_mpe # latest pettingzoo with gymnasium

env_args:
map_name: "simple_adversary" # others can be found in mpe.py
continuous_actions: False
max_cycles: 25

mask_flag: False
global_state_flag: False
opp_action_in_cc: True
agent_level_batch_update: False
154 changes: 154 additions & 0 deletions marllib/envs/base_env/gymnasium_mpe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# MIT License

# Copyright (c) 2023 Replicable-MARL

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from ray.rllib.env.multi_agent_env import MultiAgentEnv
from gym.spaces import Dict as GymDict, Discrete, Box
import supersuit as ss
import time

# pettingzoo 1.23.0
from pettingzoo.mpe import simple_adversary_v3, simple_crypto_v3, simple_push_v3, simple_spread_v3, simple_tag_v3, \
simple_reference_v3, simple_world_comm_v3, simple_speaker_listener_v4

REGISTRY = {}
REGISTRY["simple_adversary"] = simple_adversary_v3.parallel_env
REGISTRY["simple_crypto"] = simple_crypto_v3.parallel_env
REGISTRY["simple_push"] = simple_push_v3.parallel_env
REGISTRY["simple_tag"] = simple_tag_v3.parallel_env
REGISTRY["simple_spread"] = simple_spread_v3.parallel_env
REGISTRY["simple_reference"] = simple_reference_v3.parallel_env
REGISTRY["simple_world_comm"] = simple_world_comm_v3.parallel_env
REGISTRY["simple_speaker_listener"] = simple_speaker_listener_v4.parallel_env


policy_mapping_dict = {
"simple_adversary": {
"description": "one team attack, one team survive",
"team_prefix": ("adversary_", "agent_"),
"all_agents_one_policy": False,
"one_agent_one_policy": True,
},
"simple_crypto": {
"description": "two team cooperate, one team attack",
"team_prefix": ("eve_", "bob_", "alice_"),
"all_agents_one_policy": False,
"one_agent_one_policy": True,
},
"simple_push": {
"description": "one team target on landmark, one team attack",
"team_prefix": ("adversary_", "agent_",),
"all_agents_one_policy": False,
"one_agent_one_policy": True,
},
"simple_tag": {
"description": "one team attack, one team survive",
"team_prefix": ("adversary_", "agent_"),
"all_agents_one_policy": False,
"one_agent_one_policy": True,
},
"simple_spread": {
"description": "one team cooperate",
"team_prefix": ("agent_",),
"all_agents_one_policy": True,
"one_agent_one_policy": True,
},
"simple_reference": {
"description": "one team cooperate",
"team_prefix": ("agent_",),
"all_agents_one_policy": True,
"one_agent_one_policy": True,
},
"simple_world_comm": {
"description": "two team cooperate and attack, one team survive",
"team_prefix": ("adversary_", "leadadversary_", "agent_"),
"all_agents_one_policy": False,
"one_agent_one_policy": True,
},
"simple_speaker_listener": {
"description": "two team cooperate",
"team_prefix": ("speaker_", "listener_"),
"all_agents_one_policy": True,
"one_agent_one_policy": True,
},
}


class RLlibMPE_Gymnasium(MultiAgentEnv):

def __init__(self, env_config):
map = env_config["map_name"]
env_config.pop("map_name", None)
env = REGISTRY[map](**env_config)

# keep obs and action dim same across agents
# pad_action_space_v0 will auto mask the padding actions
env = ss.pad_observations_v0(env)
env = ss.pad_action_space_v0(env)

self.env = env
self.action_space = Discrete(self.env.action_spaces.popitem()[1].n)
self.observation_space = GymDict({"obs": Box(
low=-100.0,
high=100.0,
shape=(self.env.observation_spaces.popitem()[1].shape[0],),
dtype=self.env.observation_spaces.popitem()[1].dtype)})
self.agents = self.env.possible_agents
self.num_agents = len(self.agents)
env_config["map_name"] = map
self.env_config = env_config

def reset(self):
original_obs = self.env.reset()
obs = {}
for i in self.agents:
obs[i] = {"obs": original_obs[0][i]}
return obs

def step(self, action_dict):
o, r, d, t, info = self.env.step(action_dict)
rewards = {}
obs = {}
for agent in self.agents:
rewards[agent] = r[agent]
obs[agent] = {
"obs": o[agent]
}
dones = {"__all__": d.popitem()[1] or t.popitem()[1]}
return obs, rewards, dones, info

def close(self):
self.env.close()

def render(self, mode=None):
self.env.render()
time.sleep(0.05)
return True

def get_env_info(self):
env_info = {
"space_obs": self.observation_space,
"space_act": self.action_space,
"num_agents": self.num_agents,
"episode_limit": 25,
"policy_mapping_info": policy_mapping_dict
}
return env_info
1 change: 1 addition & 0 deletions marllib/envs/base_env/mpe.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
simple_spread_v2, simple_reference_v2, simple_world_comm_v2, simple_speaker_listener_v3
import time

# pettingzoo 1.12.0
REGISTRY = {}
REGISTRY["simple_adversary"] = simple_adversary_v2.parallel_env
REGISTRY["simple_crypto"] = simple_crypto_v2.parallel_env
Expand Down
7 changes: 7 additions & 0 deletions marllib/envs/global_reward_env/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@
except Exception as e:
COOP_ENV_REGISTRY["mpe"] = str(e)

try:
from marllib.envs.global_reward_env.gymnasium_mpe_fcoop import RLlibMPE_Gymnasium_FCOOP

COOP_ENV_REGISTRY["gymnasium_mpe"] = RLlibMPE_Gymnasium_FCOOP
except Exception as e:
COOP_ENV_REGISTRY["gymnasium_mpe"] = str(e)

try:
from marllib.envs.global_reward_env.magent_fcoop import RLlibMAgent_FCOOP

Expand Down
48 changes: 48 additions & 0 deletions marllib/envs/global_reward_env/gymnasium_mpe_fcoop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# MIT License

# Copyright (c) 2023 Replicable-MARL

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from marllib.envs.base_env.gymnasium_mpe import RLlibMPE_Gymnasium

legal_scenarios = ["simple_spread", "simple_reference", "simple_speaker_listener"]


class RLlibMPE_Gymnasium_FCOOP(RLlibMPE_Gymnasium):

def __init__(self, env_config):
if env_config["map_name"] not in legal_scenarios:
raise ValueError("must in: 1.simple_spread, 2.simple_reference, 3.simple_speaker_listener")
super().__init__(env_config)

def step(self, action_dict):
o, r, d, t, info = self.env.step(action_dict)
reward = 0
for key in r.keys():
reward += r[key]
rewards = {}
obs = {}
for agent in self.agents:
rewards[agent] = reward/self.num_agents
obs[agent] = {
"obs": o[agent]
}
dones = {"__all__": d.popitem()[1] or t.popitem()[1]}
return obs, rewards, dones, info
2 changes: 1 addition & 1 deletion marllib/marl/algos/hyperparams/finetuned/mpe/coma.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ algo_args:
lambda: 1.0
vf_loss_coeff: 1.0
batch_episode: 128
batch_mode: "truncate_episodes"
batch_mode: "complete_episodes"
lr: 0.0005
entropy_coeff: 0.01
2 changes: 1 addition & 1 deletion marllib/marl/algos/hyperparams/finetuned/mpe/facmac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ algo_args:
buffer_size_episode: 1000
target_network_update_freq_episode: 1
tau: 0.002
batch_mode: "truncate_episodes"
batch_mode: "complete_episodes"
mixer: "qmix" # qmix or vdn

2 changes: 1 addition & 1 deletion marllib/marl/algos/hyperparams/finetuned/mpe/happo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ algo_args:
entropy_coeff: 0.01
vf_clip_param: 10.0
min_lr_schedule: 1e-11
batch_mode: "truncate_episodes"
batch_mode: "complete_episodes"
2 changes: 1 addition & 1 deletion marllib/marl/algos/hyperparams/finetuned/mpe/hatrpo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ algo_args:
vf_loss_coeff: 1.0
entropy_coeff: 0.01
vf_clip_param: 10.0
batch_mode: "truncate_episodes"
batch_mode: "complete_episodes"
kl_threshold: 0.00001
accept_ratio: 0.5
critic_lr: 0.0005
2 changes: 1 addition & 1 deletion marllib/marl/algos/hyperparams/finetuned/mpe/ia2c.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ algo_args:
lambda: 1.0
vf_loss_coeff: 1.0
batch_episode: 128
batch_mode: "truncate_episodes"
batch_mode: "complete_episodes"
lr: 0.0005
entropy_coeff: 0.01
2 changes: 1 addition & 1 deletion marllib/marl/algos/hyperparams/finetuned/mpe/iddpg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,5 @@ algo_args:
buffer_size_episode: 1000
target_network_update_freq_episode: 1
tau: 0.002
batch_mode: "truncate_episodes"
batch_mode: "complete_episodes"

2 changes: 1 addition & 1 deletion marllib/marl/algos/hyperparams/finetuned/mpe/ippo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ algo_args:
entropy_coeff: 0.01
clip_param: 0.3
vf_clip_param: 20.0
batch_mode: "truncate_episodes"
batch_mode: "complete_episodes"

2 changes: 1 addition & 1 deletion marllib/marl/algos/hyperparams/finetuned/mpe/itrpo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ algo_args:
vf_loss_coeff: 1.0
entropy_coeff: 0.01
vf_clip_param: 10.0
batch_mode: "truncate_episodes"
batch_mode: "complete_episodes"
kl_threshold: 0.00001
accept_ratio: 0.5
critic_lr: 0.0005
4 changes: 2 additions & 2 deletions marllib/marl/algos/hyperparams/finetuned/mpe/maa2c.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ algo_args:
lambda: 1.0
vf_loss_coeff: 1.0
batch_episode: 128
batch_mode: "truncate_episodes"
lr: 0.0005
batch_mode: "complete_episodes"
lr: 0.0003
entropy_coeff: 0.01
2 changes: 1 addition & 1 deletion marllib/marl/algos/hyperparams/finetuned/mpe/maddpg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,5 @@ algo_args:
buffer_size_episode: 10000
target_network_update_freq_episode: 1
tau: 0.002
batch_mode: "truncate_episodes"
batch_mode: "complete_episodes"

2 changes: 1 addition & 1 deletion marllib/marl/algos/hyperparams/finetuned/mpe/mappo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,6 @@ algo_args:
entropy_coeff: 0.01
clip_param: 0.3
vf_clip_param: 20.0
batch_mode: "truncate_episodes"
batch_mode: "complete_episodes"


2 changes: 1 addition & 1 deletion marllib/marl/algos/hyperparams/finetuned/mpe/matrpo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ algo_args:
vf_loss_coeff: 1.0
entropy_coeff: 0.01
vf_clip_param: 10.0
batch_mode: "truncate_episodes"
batch_mode: "complete_episodes"
kl_threshold: 0.00001
accept_ratio: 0.5
critic_lr: 0.0005
2 changes: 1 addition & 1 deletion marllib/marl/algos/hyperparams/finetuned/mpe/vda2c.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ algo_args:
lambda: 1.0
vf_loss_coeff: 1.0
batch_episode: 128
batch_mode: "truncate_episodes"
batch_mode: "complete_episodes"
lr: 0.0005
entropy_coeff: 0.01
mixer: "qmix" # vdn
2 changes: 1 addition & 1 deletion marllib/marl/algos/hyperparams/finetuned/mpe/vdppo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ algo_args:
entropy_coeff: 0.01
clip_param: 0.3
vf_clip_param: 20.0
batch_mode: "truncate_episodes"
batch_mode: "complete_episodes"
mixer: "qmix" # qmix or vdn

0 comments on commit d9d4c62

Please sign in to comment.