diff --git a/marllib/envs/base_env/__init__.py b/marllib/envs/base_env/__init__.py index 8c05b21b..26c22146 100644 --- a/marllib/envs/base_env/__init__.py +++ b/marllib/envs/base_env/__init__.py @@ -38,7 +38,12 @@ ENV_REGISTRY["mpe"] = str(e) +try: + from marllib.envs.base_env.gymnasium_mpe import RLlibMPE_Gymnasium + ENV_REGISTRY["gymnasium_mpe"] = RLlibMPE_Gymnasium +except Exception as e: + ENV_REGISTRY["gymnasium_mpe"] = str(e) try: from marllib.envs.base_env.mamujoco import RLlibMAMujoco diff --git a/marllib/envs/base_env/config/gymnasium_mpe.yaml b/marllib/envs/base_env/config/gymnasium_mpe.yaml new file mode 100644 index 00000000..d2a6dee7 --- /dev/null +++ b/marllib/envs/base_env/config/gymnasium_mpe.yaml @@ -0,0 +1,33 @@ +# MIT License + +# Copyright (c) 2023 Replicable-MARL + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +env: gymnasium_mpe # latest pettingzoo with gymnasium + +env_args: + map_name: "simple_adversary" # others can be found in mpe.py + continuous_actions: False + max_cycles: 25 + +mask_flag: False +global_state_flag: False +opp_action_in_cc: True +agent_level_batch_update: False \ No newline at end of file diff --git a/marllib/envs/base_env/gymnasium_mpe.py b/marllib/envs/base_env/gymnasium_mpe.py new file mode 100644 index 00000000..2f82aeec --- /dev/null +++ b/marllib/envs/base_env/gymnasium_mpe.py @@ -0,0 +1,154 @@ +# MIT License + +# Copyright (c) 2023 Replicable-MARL + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from ray.rllib.env.multi_agent_env import MultiAgentEnv +from gym.spaces import Dict as GymDict, Discrete, Box +import supersuit as ss +import time + +# pettingzoo 1.23.0 +from pettingzoo.mpe import simple_adversary_v3, simple_crypto_v3, simple_push_v3, simple_spread_v3, simple_tag_v3, \ + simple_reference_v3, simple_world_comm_v3, simple_speaker_listener_v4 + +REGISTRY = {} +REGISTRY["simple_adversary"] = simple_adversary_v3.parallel_env +REGISTRY["simple_crypto"] = simple_crypto_v3.parallel_env +REGISTRY["simple_push"] = simple_push_v3.parallel_env +REGISTRY["simple_tag"] = simple_tag_v3.parallel_env +REGISTRY["simple_spread"] = simple_spread_v3.parallel_env +REGISTRY["simple_reference"] = simple_reference_v3.parallel_env +REGISTRY["simple_world_comm"] = simple_world_comm_v3.parallel_env +REGISTRY["simple_speaker_listener"] = simple_speaker_listener_v4.parallel_env + + +policy_mapping_dict = { + "simple_adversary": { + "description": "one team attack, one team survive", + "team_prefix": ("adversary_", "agent_"), + "all_agents_one_policy": False, + "one_agent_one_policy": True, + }, + "simple_crypto": { + "description": "two team cooperate, one team attack", + "team_prefix": ("eve_", "bob_", "alice_"), + "all_agents_one_policy": False, + "one_agent_one_policy": True, + }, + "simple_push": { + "description": "one team target on landmark, one team attack", + "team_prefix": ("adversary_", "agent_",), + "all_agents_one_policy": False, + "one_agent_one_policy": True, + }, + "simple_tag": { + "description": "one team attack, one team survive", + "team_prefix": ("adversary_", "agent_"), + "all_agents_one_policy": False, + "one_agent_one_policy": True, + }, + "simple_spread": { + "description": "one team cooperate", + "team_prefix": ("agent_",), + "all_agents_one_policy": True, + "one_agent_one_policy": True, + }, + "simple_reference": { + "description": "one team cooperate", + "team_prefix": ("agent_",), + "all_agents_one_policy": True, + "one_agent_one_policy": True, + }, + "simple_world_comm": { + "description": "two team cooperate and attack, one team survive", + "team_prefix": ("adversary_", "leadadversary_", "agent_"), + "all_agents_one_policy": False, + "one_agent_one_policy": True, + }, + "simple_speaker_listener": { + "description": "two team cooperate", + "team_prefix": ("speaker_", "listener_"), + "all_agents_one_policy": True, + "one_agent_one_policy": True, + }, +} + + +class RLlibMPE_Gymnasium(MultiAgentEnv): + + def __init__(self, env_config): + map = env_config["map_name"] + env_config.pop("map_name", None) + env = REGISTRY[map](**env_config) + + # keep obs and action dim same across agents + # pad_action_space_v0 will auto mask the padding actions + env = ss.pad_observations_v0(env) + env = ss.pad_action_space_v0(env) + + self.env = env + self.action_space = Discrete(self.env.action_spaces.popitem()[1].n) + self.observation_space = GymDict({"obs": Box( + low=-100.0, + high=100.0, + shape=(self.env.observation_spaces.popitem()[1].shape[0],), + dtype=self.env.observation_spaces.popitem()[1].dtype)}) + self.agents = self.env.possible_agents + self.num_agents = len(self.agents) + env_config["map_name"] = map + self.env_config = env_config + + def reset(self): + original_obs = self.env.reset() + obs = {} + for i in self.agents: + obs[i] = {"obs": original_obs[0][i]} + return obs + + def step(self, action_dict): + o, r, d, t, info = self.env.step(action_dict) + rewards = {} + obs = {} + for agent in self.agents: + rewards[agent] = r[agent] + obs[agent] = { + "obs": o[agent] + } + dones = {"__all__": d.popitem()[1] or t.popitem()[1]} + return obs, rewards, dones, info + + def close(self): + self.env.close() + + def render(self, mode=None): + self.env.render() + time.sleep(0.05) + return True + + def get_env_info(self): + env_info = { + "space_obs": self.observation_space, + "space_act": self.action_space, + "num_agents": self.num_agents, + "episode_limit": 25, + "policy_mapping_info": policy_mapping_dict + } + return env_info diff --git a/marllib/envs/base_env/mpe.py b/marllib/envs/base_env/mpe.py index ef32d765..7fea8c59 100644 --- a/marllib/envs/base_env/mpe.py +++ b/marllib/envs/base_env/mpe.py @@ -28,6 +28,7 @@ simple_spread_v2, simple_reference_v2, simple_world_comm_v2, simple_speaker_listener_v3 import time +# pettingzoo 1.12.0 REGISTRY = {} REGISTRY["simple_adversary"] = simple_adversary_v2.parallel_env REGISTRY["simple_crypto"] = simple_crypto_v2.parallel_env diff --git a/marllib/envs/global_reward_env/__init__.py b/marllib/envs/global_reward_env/__init__.py index 1f1eade5..15376383 100644 --- a/marllib/envs/global_reward_env/__init__.py +++ b/marllib/envs/global_reward_env/__init__.py @@ -36,6 +36,13 @@ except Exception as e: COOP_ENV_REGISTRY["mpe"] = str(e) +try: + from marllib.envs.global_reward_env.gymnasium_mpe_fcoop import RLlibMPE_Gymnasium_FCOOP + + COOP_ENV_REGISTRY["gymnasium_mpe"] = RLlibMPE_Gymnasium_FCOOP +except Exception as e: + COOP_ENV_REGISTRY["gymnasium_mpe"] = str(e) + try: from marllib.envs.global_reward_env.magent_fcoop import RLlibMAgent_FCOOP diff --git a/marllib/envs/global_reward_env/gymnasium_mpe_fcoop.py b/marllib/envs/global_reward_env/gymnasium_mpe_fcoop.py new file mode 100644 index 00000000..adc7544f --- /dev/null +++ b/marllib/envs/global_reward_env/gymnasium_mpe_fcoop.py @@ -0,0 +1,48 @@ +# MIT License + +# Copyright (c) 2023 Replicable-MARL + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from marllib.envs.base_env.gymnasium_mpe import RLlibMPE_Gymnasium + +legal_scenarios = ["simple_spread", "simple_reference", "simple_speaker_listener"] + + +class RLlibMPE_Gymnasium_FCOOP(RLlibMPE_Gymnasium): + + def __init__(self, env_config): + if env_config["map_name"] not in legal_scenarios: + raise ValueError("must in: 1.simple_spread, 2.simple_reference, 3.simple_speaker_listener") + super().__init__(env_config) + + def step(self, action_dict): + o, r, d, t, info = self.env.step(action_dict) + reward = 0 + for key in r.keys(): + reward += r[key] + rewards = {} + obs = {} + for agent in self.agents: + rewards[agent] = reward/self.num_agents + obs[agent] = { + "obs": o[agent] + } + dones = {"__all__": d.popitem()[1] or t.popitem()[1]} + return obs, rewards, dones, info diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/coma.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/coma.yaml index b6344807..1c6b552c 100644 --- a/marllib/marl/algos/hyperparams/finetuned/mpe/coma.yaml +++ b/marllib/marl/algos/hyperparams/finetuned/mpe/coma.yaml @@ -29,6 +29,6 @@ algo_args: lambda: 1.0 vf_loss_coeff: 1.0 batch_episode: 128 - batch_mode: "truncate_episodes" + batch_mode: "complete_episodes" lr: 0.0005 entropy_coeff: 0.01 diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/facmac.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/facmac.yaml index f42ce4ec..2c8d62b7 100644 --- a/marllib/marl/algos/hyperparams/finetuned/mpe/facmac.yaml +++ b/marllib/marl/algos/hyperparams/finetuned/mpe/facmac.yaml @@ -36,6 +36,6 @@ algo_args: buffer_size_episode: 1000 target_network_update_freq_episode: 1 tau: 0.002 - batch_mode: "truncate_episodes" + batch_mode: "complete_episodes" mixer: "qmix" # qmix or vdn diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/happo.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/happo.yaml index afef9151..4ab06ad1 100644 --- a/marllib/marl/algos/hyperparams/finetuned/mpe/happo.yaml +++ b/marllib/marl/algos/hyperparams/finetuned/mpe/happo.yaml @@ -38,4 +38,4 @@ algo_args: entropy_coeff: 0.01 vf_clip_param: 10.0 min_lr_schedule: 1e-11 - batch_mode: "truncate_episodes" + batch_mode: "complete_episodes" diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/hatrpo.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/hatrpo.yaml index a0d81929..588d1ed3 100644 --- a/marllib/marl/algos/hyperparams/finetuned/mpe/hatrpo.yaml +++ b/marllib/marl/algos/hyperparams/finetuned/mpe/hatrpo.yaml @@ -34,7 +34,7 @@ algo_args: vf_loss_coeff: 1.0 entropy_coeff: 0.01 vf_clip_param: 10.0 - batch_mode: "truncate_episodes" + batch_mode: "complete_episodes" kl_threshold: 0.00001 accept_ratio: 0.5 critic_lr: 0.0005 diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/ia2c.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/ia2c.yaml index 740c54a3..46f23425 100644 --- a/marllib/marl/algos/hyperparams/finetuned/mpe/ia2c.yaml +++ b/marllib/marl/algos/hyperparams/finetuned/mpe/ia2c.yaml @@ -29,6 +29,6 @@ algo_args: lambda: 1.0 vf_loss_coeff: 1.0 batch_episode: 128 - batch_mode: "truncate_episodes" + batch_mode: "complete_episodes" lr: 0.0005 entropy_coeff: 0.01 diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/iddpg.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/iddpg.yaml index 31446e12..7fce7f11 100644 --- a/marllib/marl/algos/hyperparams/finetuned/mpe/iddpg.yaml +++ b/marllib/marl/algos/hyperparams/finetuned/mpe/iddpg.yaml @@ -36,5 +36,5 @@ algo_args: buffer_size_episode: 1000 target_network_update_freq_episode: 1 tau: 0.002 - batch_mode: "truncate_episodes" + batch_mode: "complete_episodes" diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/ippo.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/ippo.yaml index aed51641..79fd522e 100644 --- a/marllib/marl/algos/hyperparams/finetuned/mpe/ippo.yaml +++ b/marllib/marl/algos/hyperparams/finetuned/mpe/ippo.yaml @@ -35,5 +35,5 @@ algo_args: entropy_coeff: 0.01 clip_param: 0.3 vf_clip_param: 20.0 - batch_mode: "truncate_episodes" + batch_mode: "complete_episodes" diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/itrpo.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/itrpo.yaml index b349fc56..28acb9cc 100644 --- a/marllib/marl/algos/hyperparams/finetuned/mpe/itrpo.yaml +++ b/marllib/marl/algos/hyperparams/finetuned/mpe/itrpo.yaml @@ -34,7 +34,7 @@ algo_args: vf_loss_coeff: 1.0 entropy_coeff: 0.01 vf_clip_param: 10.0 - batch_mode: "truncate_episodes" + batch_mode: "complete_episodes" kl_threshold: 0.00001 accept_ratio: 0.5 critic_lr: 0.0005 diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/maa2c.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/maa2c.yaml index 74dccc18..08ce1c22 100644 --- a/marllib/marl/algos/hyperparams/finetuned/mpe/maa2c.yaml +++ b/marllib/marl/algos/hyperparams/finetuned/mpe/maa2c.yaml @@ -29,6 +29,6 @@ algo_args: lambda: 1.0 vf_loss_coeff: 1.0 batch_episode: 128 - batch_mode: "truncate_episodes" - lr: 0.0005 + batch_mode: "complete_episodes" + lr: 0.0003 entropy_coeff: 0.01 diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/maddpg.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/maddpg.yaml index 08e41a0e..7cc1997d 100644 --- a/marllib/marl/algos/hyperparams/finetuned/mpe/maddpg.yaml +++ b/marllib/marl/algos/hyperparams/finetuned/mpe/maddpg.yaml @@ -36,5 +36,5 @@ algo_args: buffer_size_episode: 10000 target_network_update_freq_episode: 1 tau: 0.002 - batch_mode: "truncate_episodes" + batch_mode: "complete_episodes" diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/mappo.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/mappo.yaml index 823705a1..e5f13fc5 100644 --- a/marllib/marl/algos/hyperparams/finetuned/mpe/mappo.yaml +++ b/marllib/marl/algos/hyperparams/finetuned/mpe/mappo.yaml @@ -35,6 +35,6 @@ algo_args: entropy_coeff: 0.01 clip_param: 0.3 vf_clip_param: 20.0 - batch_mode: "truncate_episodes" + batch_mode: "complete_episodes" diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/matrpo.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/matrpo.yaml index d1b2e0cd..8d0b31bf 100644 --- a/marllib/marl/algos/hyperparams/finetuned/mpe/matrpo.yaml +++ b/marllib/marl/algos/hyperparams/finetuned/mpe/matrpo.yaml @@ -34,7 +34,7 @@ algo_args: vf_loss_coeff: 1.0 entropy_coeff: 0.01 vf_clip_param: 10.0 - batch_mode: "truncate_episodes" + batch_mode: "complete_episodes" kl_threshold: 0.00001 accept_ratio: 0.5 critic_lr: 0.0005 diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/vda2c.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/vda2c.yaml index 7053131f..e11990b1 100644 --- a/marllib/marl/algos/hyperparams/finetuned/mpe/vda2c.yaml +++ b/marllib/marl/algos/hyperparams/finetuned/mpe/vda2c.yaml @@ -29,7 +29,7 @@ algo_args: lambda: 1.0 vf_loss_coeff: 1.0 batch_episode: 128 - batch_mode: "truncate_episodes" + batch_mode: "complete_episodes" lr: 0.0005 entropy_coeff: 0.01 mixer: "qmix" # vdn diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/vdppo.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/vdppo.yaml index cdfba66e..4e755e80 100644 --- a/marllib/marl/algos/hyperparams/finetuned/mpe/vdppo.yaml +++ b/marllib/marl/algos/hyperparams/finetuned/mpe/vdppo.yaml @@ -35,5 +35,5 @@ algo_args: entropy_coeff: 0.01 clip_param: 0.3 vf_clip_param: 20.0 - batch_mode: "truncate_episodes" + batch_mode: "complete_episodes" mixer: "qmix" # qmix or vdn