Merge pull request #184 from Replicable-MARL/rllib_1.8.0_dev

Rllib 1.8.0 dev
Replicable-MARL · Sep 5, 2023 · d9d4c62 · d9d4c62
2 parents aea618a + bd3c560
commit d9d4c62
Show file tree

Hide file tree

Showing 20 changed files with 263 additions and 15 deletions.
diff --git a/marllib/envs/base_env/__init__.py b/marllib/envs/base_env/__init__.py
@@ -38,7 +38,12 @@
     ENV_REGISTRY["mpe"] = str(e)
 
 
+try:
+    from marllib.envs.base_env.gymnasium_mpe import RLlibMPE_Gymnasium
 
+    ENV_REGISTRY["gymnasium_mpe"] = RLlibMPE_Gymnasium
+except Exception as e:
+    ENV_REGISTRY["gymnasium_mpe"] = str(e)
 
 try:
     from marllib.envs.base_env.mamujoco import RLlibMAMujoco

diff --git a/marllib/envs/base_env/config/gymnasium_mpe.yaml b/marllib/envs/base_env/config/gymnasium_mpe.yaml
@@ -0,0 +1,33 @@
+# MIT License
+
+# Copyright (c) 2023 Replicable-MARL
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+env: gymnasium_mpe # latest pettingzoo with gymnasium
+
+env_args:
+  map_name: "simple_adversary" # others can be found in mpe.py
+  continuous_actions: False
+  max_cycles: 25
+
+mask_flag: False
+global_state_flag: False
+opp_action_in_cc: True
+agent_level_batch_update: False
diff --git a/marllib/envs/base_env/gymnasium_mpe.py b/marllib/envs/base_env/gymnasium_mpe.py
@@ -0,0 +1,154 @@
+# MIT License
+
+# Copyright (c) 2023 Replicable-MARL
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from ray.rllib.env.multi_agent_env import MultiAgentEnv
+from gym.spaces import Dict as GymDict, Discrete, Box
+import supersuit as ss
+import time
+
+# pettingzoo 1.23.0
+from pettingzoo.mpe import simple_adversary_v3, simple_crypto_v3, simple_push_v3, simple_spread_v3, simple_tag_v3, \
+    simple_reference_v3, simple_world_comm_v3, simple_speaker_listener_v4
+
+REGISTRY = {}
+REGISTRY["simple_adversary"] = simple_adversary_v3.parallel_env
+REGISTRY["simple_crypto"] = simple_crypto_v3.parallel_env
+REGISTRY["simple_push"] = simple_push_v3.parallel_env
+REGISTRY["simple_tag"] = simple_tag_v3.parallel_env
+REGISTRY["simple_spread"] = simple_spread_v3.parallel_env
+REGISTRY["simple_reference"] = simple_reference_v3.parallel_env
+REGISTRY["simple_world_comm"] = simple_world_comm_v3.parallel_env
+REGISTRY["simple_speaker_listener"] = simple_speaker_listener_v4.parallel_env
+
+
+policy_mapping_dict = {
+    "simple_adversary": {
+        "description": "one team attack, one team survive",
+        "team_prefix": ("adversary_", "agent_"),
+        "all_agents_one_policy": False,
+        "one_agent_one_policy": True,
+    },
+    "simple_crypto": {
+        "description": "two team cooperate, one team attack",
+        "team_prefix": ("eve_", "bob_", "alice_"),
+        "all_agents_one_policy": False,
+        "one_agent_one_policy": True,
+    },
+    "simple_push": {
+        "description": "one team target on landmark, one team attack",
+        "team_prefix": ("adversary_", "agent_",),
+        "all_agents_one_policy": False,
+        "one_agent_one_policy": True,
+    },
+    "simple_tag": {
+        "description": "one team attack, one team survive",
+        "team_prefix": ("adversary_", "agent_"),
+        "all_agents_one_policy": False,
+        "one_agent_one_policy": True,
+    },
+    "simple_spread": {
+        "description": "one team cooperate",
+        "team_prefix": ("agent_",),
+        "all_agents_one_policy": True,
+        "one_agent_one_policy": True,
+    },
+    "simple_reference": {
+        "description": "one team cooperate",
+        "team_prefix": ("agent_",),
+        "all_agents_one_policy": True,
+        "one_agent_one_policy": True,
+    },
+    "simple_world_comm": {
+        "description": "two team cooperate and attack, one team survive",
+        "team_prefix": ("adversary_", "leadadversary_", "agent_"),
+        "all_agents_one_policy": False,
+        "one_agent_one_policy": True,
+    },
+    "simple_speaker_listener": {
+        "description": "two team cooperate",
+        "team_prefix": ("speaker_", "listener_"),
+        "all_agents_one_policy": True,
+        "one_agent_one_policy": True,
+    },
+}
+
+
+class RLlibMPE_Gymnasium(MultiAgentEnv):
+
+    def __init__(self, env_config):
+        map = env_config["map_name"]
+        env_config.pop("map_name", None)
+        env = REGISTRY[map](**env_config)
+
+        # keep obs and action dim same across agents
+        # pad_action_space_v0 will auto mask the padding actions
+        env = ss.pad_observations_v0(env)
+        env = ss.pad_action_space_v0(env)
+
+        self.env = env
+        self.action_space = Discrete(self.env.action_spaces.popitem()[1].n)
+        self.observation_space = GymDict({"obs": Box(
+            low=-100.0,
+            high=100.0,
+            shape=(self.env.observation_spaces.popitem()[1].shape[0],),
+            dtype=self.env.observation_spaces.popitem()[1].dtype)})
+        self.agents = self.env.possible_agents
+        self.num_agents = len(self.agents)
+        env_config["map_name"] = map
+        self.env_config = env_config
+
+    def reset(self):
+        original_obs = self.env.reset()
+        obs = {}
+        for i in self.agents:
+            obs[i] = {"obs": original_obs[0][i]}
+        return obs
+
+    def step(self, action_dict):
+        o, r, d, t, info = self.env.step(action_dict)
+        rewards = {}
+        obs = {}
+        for agent in self.agents:
+            rewards[agent] = r[agent]
+            obs[agent] = {
+                "obs": o[agent]
+            }
+        dones = {"__all__": d.popitem()[1] or t.popitem()[1]}
+        return obs, rewards, dones, info
+
+    def close(self):
+        self.env.close()
+
+    def render(self, mode=None):
+        self.env.render()
+        time.sleep(0.05)
+        return True
+
+    def get_env_info(self):
+        env_info = {
+            "space_obs": self.observation_space,
+            "space_act": self.action_space,
+            "num_agents": self.num_agents,
+            "episode_limit": 25,
+            "policy_mapping_info": policy_mapping_dict
+        }
+        return env_info
diff --git a/marllib/envs/base_env/mpe.py b/marllib/envs/base_env/mpe.py
@@ -28,6 +28,7 @@
     simple_spread_v2, simple_reference_v2, simple_world_comm_v2, simple_speaker_listener_v3
 import time
 
+# pettingzoo 1.12.0
 REGISTRY = {}
 REGISTRY["simple_adversary"] = simple_adversary_v2.parallel_env
 REGISTRY["simple_crypto"] = simple_crypto_v2.parallel_env

diff --git a/marllib/envs/global_reward_env/__init__.py b/marllib/envs/global_reward_env/__init__.py
@@ -36,6 +36,13 @@
 except Exception as e:
     COOP_ENV_REGISTRY["mpe"] = str(e)
 
+try:
+    from marllib.envs.global_reward_env.gymnasium_mpe_fcoop import RLlibMPE_Gymnasium_FCOOP
+
+    COOP_ENV_REGISTRY["gymnasium_mpe"] = RLlibMPE_Gymnasium_FCOOP
+except Exception as e:
+    COOP_ENV_REGISTRY["gymnasium_mpe"] = str(e)
+
 try:
     from marllib.envs.global_reward_env.magent_fcoop import RLlibMAgent_FCOOP
 

diff --git a/marllib/envs/global_reward_env/gymnasium_mpe_fcoop.py b/marllib/envs/global_reward_env/gymnasium_mpe_fcoop.py
@@ -0,0 +1,48 @@
+# MIT License
+
+# Copyright (c) 2023 Replicable-MARL
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from marllib.envs.base_env.gymnasium_mpe import RLlibMPE_Gymnasium
+
+legal_scenarios = ["simple_spread", "simple_reference", "simple_speaker_listener"]
+
+
+class RLlibMPE_Gymnasium_FCOOP(RLlibMPE_Gymnasium):
+
+    def __init__(self, env_config):
+        if env_config["map_name"] not in legal_scenarios:
+            raise ValueError("must in: 1.simple_spread, 2.simple_reference, 3.simple_speaker_listener")
+        super().__init__(env_config)
+
+    def step(self, action_dict):
+        o, r, d, t, info = self.env.step(action_dict)
+        reward = 0
+        for key in r.keys():
+            reward += r[key]
+        rewards = {}
+        obs = {}
+        for agent in self.agents:
+            rewards[agent] = reward/self.num_agents
+            obs[agent] = {
+                "obs": o[agent]
+            }
+        dones = {"__all__": d.popitem()[1] or t.popitem()[1]}
+        return obs, rewards, dones, info
diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/coma.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/coma.yaml
@@ -29,6 +29,6 @@ algo_args:
   lambda: 1.0
   vf_loss_coeff: 1.0
   batch_episode: 128
-  batch_mode: "truncate_episodes"
+  batch_mode: "complete_episodes"
   lr: 0.0005
   entropy_coeff: 0.01
diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/facmac.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/facmac.yaml
@@ -36,6 +36,6 @@ algo_args:
   buffer_size_episode: 1000
   target_network_update_freq_episode: 1
   tau: 0.002
-  batch_mode: "truncate_episodes"
+  batch_mode: "complete_episodes"
   mixer: "qmix" # qmix or vdn
 
diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/happo.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/happo.yaml
@@ -38,4 +38,4 @@ algo_args:
   entropy_coeff: 0.01
   vf_clip_param: 10.0
   min_lr_schedule: 1e-11
-  batch_mode: "truncate_episodes"
+  batch_mode: "complete_episodes"
diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/hatrpo.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/hatrpo.yaml
@@ -34,7 +34,7 @@ algo_args:
   vf_loss_coeff: 1.0
   entropy_coeff: 0.01
   vf_clip_param: 10.0
-  batch_mode: "truncate_episodes"
+  batch_mode: "complete_episodes"
   kl_threshold: 0.00001
   accept_ratio: 0.5
   critic_lr: 0.0005
diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/ia2c.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/ia2c.yaml
@@ -29,6 +29,6 @@ algo_args:
   lambda: 1.0
   vf_loss_coeff: 1.0
   batch_episode: 128
-  batch_mode: "truncate_episodes"
+  batch_mode: "complete_episodes"
   lr: 0.0005
   entropy_coeff: 0.01
diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/iddpg.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/iddpg.yaml
@@ -36,5 +36,5 @@ algo_args:
   buffer_size_episode: 1000
   target_network_update_freq_episode: 1
   tau: 0.002
-  batch_mode: "truncate_episodes"
+  batch_mode: "complete_episodes"
 
diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/ippo.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/ippo.yaml
@@ -35,5 +35,5 @@ algo_args:
   entropy_coeff: 0.01
   clip_param: 0.3
   vf_clip_param: 20.0
-  batch_mode: "truncate_episodes"
+  batch_mode: "complete_episodes"
 
diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/itrpo.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/itrpo.yaml
@@ -34,7 +34,7 @@ algo_args:
   vf_loss_coeff: 1.0
   entropy_coeff: 0.01
   vf_clip_param: 10.0
-  batch_mode: "truncate_episodes"
+  batch_mode: "complete_episodes"
   kl_threshold: 0.00001
   accept_ratio: 0.5
   critic_lr: 0.0005
diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/maa2c.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/maa2c.yaml
@@ -29,6 +29,6 @@ algo_args:
   lambda: 1.0
   vf_loss_coeff: 1.0
   batch_episode:  128
-  batch_mode: "truncate_episodes"
-  lr: 0.0005
+  batch_mode: "complete_episodes"
+  lr: 0.0003
   entropy_coeff: 0.01
diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/maddpg.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/maddpg.yaml
@@ -36,5 +36,5 @@ algo_args:
   buffer_size_episode: 10000
   target_network_update_freq_episode: 1
   tau: 0.002
-  batch_mode: "truncate_episodes"
+  batch_mode: "complete_episodes"
 
diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/mappo.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/mappo.yaml
@@ -35,6 +35,6 @@ algo_args:
   entropy_coeff: 0.01
   clip_param: 0.3
   vf_clip_param: 20.0
-  batch_mode: "truncate_episodes"
+  batch_mode: "complete_episodes"
 
 
diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/matrpo.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/matrpo.yaml
@@ -34,7 +34,7 @@ algo_args:
   vf_loss_coeff: 1.0
   entropy_coeff: 0.01
   vf_clip_param: 10.0
-  batch_mode: "truncate_episodes"
+  batch_mode: "complete_episodes"
   kl_threshold: 0.00001
   accept_ratio: 0.5
   critic_lr: 0.0005
diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/vda2c.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/vda2c.yaml
@@ -29,7 +29,7 @@ algo_args:
   lambda: 1.0
   vf_loss_coeff: 1.0
   batch_episode:  128
-  batch_mode: "truncate_episodes"
+  batch_mode: "complete_episodes"
   lr: 0.0005
   entropy_coeff: 0.01
   mixer: "qmix" # vdn
diff --git a/marllib/marl/algos/hyperparams/finetuned/mpe/vdppo.yaml b/marllib/marl/algos/hyperparams/finetuned/mpe/vdppo.yaml
@@ -35,5 +35,5 @@ algo_args:
   entropy_coeff: 0.01
   clip_param: 0.3
   vf_clip_param: 20.0
-  batch_mode: "truncate_episodes"
+  batch_mode: "complete_episodes"
   mixer: "qmix" # qmix or vdn