diff --git a/python/app/fedcv/image_segmentation/config/gpu_mapping.yaml b/python/app/fedcv/image_segmentation/config/gpu_mapping.yaml
index 1ba657af36..446c75e24a 100644
--- a/python/app/fedcv/image_segmentation/config/gpu_mapping.yaml
+++ b/python/app/fedcv/image_segmentation/config/gpu_mapping.yaml
@@ -1,4 +1,3 @@
-# Please check "GPU_MAPPING.md" to see how to define the topology
 # You can define a cluster containing multiple GPUs within multiple machines by defining `gpu_mapping.yaml` as follows:
 
 # config_cluster0:
diff --git a/python/app/fedcv/image_segmentation/config/simulation/gpu_mapping.yaml b/python/app/fedcv/image_segmentation/config/simulation/gpu_mapping.yaml
index 1ba657af36..446c75e24a 100644
--- a/python/app/fedcv/image_segmentation/config/simulation/gpu_mapping.yaml
+++ b/python/app/fedcv/image_segmentation/config/simulation/gpu_mapping.yaml
@@ -1,4 +1,3 @@
-# Please check "GPU_MAPPING.md" to see how to define the topology
 # You can define a cluster containing multiple GPUs within multiple machines by defining `gpu_mapping.yaml` as follows:
 
 # config_cluster0:
diff --git a/python/app/fedcv/object_detection/model/yolov5/requirements.txt b/python/app/fedcv/object_detection/model/yolov5/requirements.txt
index 4a4f68539c..ce68986a8c 100644
--- a/python/app/fedcv/object_detection/model/yolov5/requirements.txt
+++ b/python/app/fedcv/object_detection/model/yolov5/requirements.txt
@@ -12,7 +12,7 @@ scipy>=1.4.1  # Google Colab version
 torch>=1.7.0,!=1.12.0  # https://github.com/ultralytics/yolov5/issues/8395
 torchvision>=0.8.1,!=0.13.0 # https://github.com/ultralytics/yolov5/issues/8395
 tqdm>=4.41.0
-protobuf<4.21.3  # https://github.com/ultralytics/yolov5/issues/8012
+protobuf>=4.21.6  # https://github.com/ultralytics/yolov5/issues/8012
 
 # Logging -------------------------------------
 tensorboard>=2.4.1
diff --git a/python/app/fedcv/object_detection/model/yolov7/requirements.txt b/python/app/fedcv/object_detection/model/yolov7/requirements.txt
index 6b71f21689..ed8045a364 100644
--- a/python/app/fedcv/object_detection/model/yolov7/requirements.txt
+++ b/python/app/fedcv/object_detection/model/yolov7/requirements.txt
@@ -11,7 +11,7 @@ scipy>=1.4.1
 torch>=1.7.0,!=1.12.0
 torchvision>=0.8.1,!=0.13.0
 tqdm>=4.41.0
-protobuf<4.21.3
+protobuf>=4.21.6
 
 # Logging -------------------------------------
 tensorboard>=2.4.1
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/README.md b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/README.md
new file mode 100644
index 0000000000..d125847dd6
--- /dev/null
+++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/README.md
@@ -0,0 +1,31 @@
+## Training Script
+
+At the client side, the client ID (a.k.a rank) starts from 1.
+Please also modify config/fedml_config.yaml, changing the `worker_num` the as the number of clients you plan to run.
+
+At the server side, run the following script:
+```
+bash run_server.sh your_run_id
+```
+
+For client 1, run the following script:
+```
+bash run_client.sh 1 your_run_id
+```
+For client 2, run the following script:
+```
+bash run_client.sh 2 your_run_id
+```
+Note: please run the server first.
+
+## A Better User-experience with FedML MLOps (open.fedml.ai)
+To reduce the difficulty and complexity of these CLI commands. We recommend you to use our MLOps (open.fedml.ai).
+FedML MLOps provides:
+- Install Client Agent and Login
+- Inviting Collaborators and group management
+- Project Management
+- Experiment Tracking (visualizing training results)
+- monitoring device status
+- visualizing system performance (including profiling flow chart)
+- distributed logging
+- model serving
\ No newline at end of file
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/__init__.py b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/bootstrap.sh b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/bootstrap.sh
new file mode 100644
index 0000000000..1f0287a7b6
--- /dev/null
+++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/bootstrap.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# pip install fedml==0.7.15
+#pip install --upgrade fedml
+
+### don't modify this part ###
+echo "[FedML]Bootstrap Finished"
+##############################
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/foolsgold/fedml_config.yaml b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/foolsgold/fedml_config.yaml
new file mode 100644
index 0000000000..2c8a9001b9
--- /dev/null
+++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/foolsgold/fedml_config.yaml
@@ -0,0 +1,65 @@
+common_args:
+  training_type: "cross_silo"
+  scenario: "horizontal"
+  using_mlops: false
+  random_seed: 0
+  config_version: release
+
+environment_args:
+  bootstrap: config/bootstrap.sh
+
+data_args:
+  dataset: "cifar10"
+  data_cache_dir: ~/fedml_data
+  partition_method: "homo"
+  partition_alpha: 0.5
+
+model_args:
+  model: "resnet56"
+  model_file_cache_folder: "./model_file_cache" # will be filled by the server automatically
+  global_model_file_path: "./model_file_cache/global_model.pt"
+
+train_args:
+  federated_optimizer: "FedAvg"
+  # for CLI running, this can be None; in MLOps deployment, `client_id_list` will be replaced with real-time selected devices
+  client_id_list:
+  # for FoolsGold Defense, if use_memory is true, then client_num_in_total should be equal to client_number_per_round
+  client_num_in_total: 8
+  client_num_per_round: 8
+  comm_round: 10
+  epochs: 1
+  batch_size: 10
+  client_optimizer: sgd
+  learning_rate: 0.03
+  weight_decay: 0.001
+
+validation_args:
+  frequency_of_the_test: 1
+
+device_args:
+  worker_num: 8
+  using_gpu: true
+  gpu_mapping_file: config/foolsgold/gpu_mapping.yaml
+  gpu_mapping_key: mapping_default
+
+comm_args:
+  backend: "MPI"
+
+tracking_args:
+  # the default log path is at ~/fedml-client/fedml/logs/ and ~/fedml-server/fedml/logs/
+  enable_wandb: false
+  wandb_key: ee0b5f53d949c84cee7decbe7a629e63fb2f8408
+  wandb_project: fedml
+  wandb_name: fedml_torch_fedavg_mnist_lr
+
+
+attack_args:
+  enable_attack: true
+  attack_type: byzantine
+  attack_mode: random
+  byzantine_client_num: 1
+
+# for FoolsGold Defense, if use_memory is true, then client_num_in_total should be equal to client_number_per_round
+defense_args:
+  enable_defense: true
+  defense_type: foolsgold
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/foolsgold/gpu_mapping.yaml b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/foolsgold/gpu_mapping.yaml
new file mode 100644
index 0000000000..79a9a634b1
--- /dev/null
+++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/foolsgold/gpu_mapping.yaml
@@ -0,0 +1,3 @@
+# this is used for 4 clients and 1 server training within a single machine which has 8 GPUs, but you hope to skip the GPU device ID.
+mapping_default:
+  host1: [3, 2, 2, 2]  # assume we only have 4 GPUs
\ No newline at end of file
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/gpu_mapping.yaml b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/gpu_mapping.yaml
new file mode 100644
index 0000000000..446c75e24a
--- /dev/null
+++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/gpu_mapping.yaml
@@ -0,0 +1,60 @@
+# You can define a cluster containing multiple GPUs within multiple machines by defining `gpu_mapping.yaml` as follows:
+
+# config_cluster0:
+#     host_name_node0: [num_of_processes_on_GPU0, num_of_processes_on_GPU1, num_of_processes_on_GPU2, num_of_processes_on_GPU3, ..., num_of_processes_on_GPU_n]
+#     host_name_node1: [num_of_processes_on_GPU0, num_of_processes_on_GPU1, num_of_processes_on_GPU2, num_of_processes_on_GPU3, ..., num_of_processes_on_GPU_n]
+#     host_name_node_m: [num_of_processes_on_GPU0, num_of_processes_on_GPU1, num_of_processes_on_GPU2, num_of_processes_on_GPU3, ..., num_of_processes_on_GPU_n]
+
+
+# this is used for 10 clients and 1 server training within a single machine which has 4 GPUs
+mapping_default:
+    ChaoyangHe-GPU-RTX2080Tix4: [3, 3, 3, 2]
+
+# this is used for 4 clients and 1 server training within a single machine which has 4 GPUs
+mapping_config1_5:
+    host1: [2, 1, 1, 1]
+
+# this is used for 10 clients and 1 server training within a single machine which has 4 GPUs
+mapping_config2_11:
+    host1: [3, 3, 3, 2]
+
+# this is used for 10 clients and 1 server training within a single machine which has 8 GPUs
+mapping_config3_11:
+    host1: [2, 2, 2, 1, 1, 1, 1, 1]
+
+# this is used for 4 clients and 1 server training within a single machine which has 8 GPUs, but you hope to skip the GPU device ID.
+mapping_config4_5:
+    host1: [1, 0, 0, 1, 1, 0, 1, 1]
+
+# this is used for 4 clients and 1 server training using 6 machines, each machine has 2 GPUs inside, but you hope to use the second GPU.
+mapping_config5_6:
+    host1: [0, 1]
+    host2: [0, 1]
+    host3: [0, 1]
+    host4: [0, 1]
+    host5: [0, 1]
+# this is used for 4 clients and 1 server training using 2 machines, each machine has 2 GPUs inside, but you hope to use the second GPU.
+mapping_config5_2:
+    gpu-worker2: [1,1]
+    gpu-worker1: [2,1]
+
+# this is used for 10 clients and 1 server training using 4 machines, each machine has 2 GPUs inside, but you hope to use the second GPU.
+mapping_config5_4:
+    gpu-worker2: [1,1]
+    gpu-worker1: [2,1]
+    gpu-worker3: [3,1]
+    gpu-worker4: [1,1]
+
+# for grpc GPU mapping
+mapping_FedML_gRPC:
+    hostname_node_server: [1]
+    hostname_node_1: [1, 0, 0, 0]
+    hostname_node_2: [1, 0, 0, 0]
+
+# for torch RPC GPU mapping
+mapping_FedML_tRPC:
+    lambda-server1: [0, 0, 0, 0, 2, 2, 1, 1]
+    lambda-server2: [2, 1, 1, 1, 0, 0, 0, 0]
+
+#mapping_FedML_tRPC:
+#    lambda-server1: [0, 0, 0, 0, 3, 3, 3, 2]
\ No newline at end of file
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_client.sh b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_client.sh
new file mode 100644
index 0000000000..82f8c2dbc6
--- /dev/null
+++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_client.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+RANK=$1
+RUN_ID=$2
+python3 torch_client.py --cf config/foolsgold/fedml_config.yaml --rank $RANK --role client --run_id $RUN_ID
\ No newline at end of file
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_mpi.sh b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_mpi.sh
new file mode 100644
index 0000000000..ca02e299c0
--- /dev/null
+++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_mpi.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+WORKER_NUM=$1
+
+PROCESS_NUM=`expr $WORKER_NUM + 1`
+echo $PROCESS_NUM
+
+hostname > mpi_host_file
+
+mpirun -np $PROCESS_NUM \
+-hostfile mpi_host_file \
+python torch_mpi.py --cf config/foolsgold/fedml_config.yaml
\ No newline at end of file
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_server.sh b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_server.sh
new file mode 100644
index 0000000000..2714f440f5
--- /dev/null
+++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_server.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+RUN_ID=$1
+python3 torch_server.py --cf config/foolsgold/fedml_config.yaml --rank 0 --role server --run_id $RUN_ID
\ No newline at end of file
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_client.py b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_client.py
new file mode 100644
index 0000000000..0d507a94cb
--- /dev/null
+++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_client.py
@@ -0,0 +1,31 @@
+import logging
+
+import fedml
+from fedml import FedMLRunner
+from fedml.model.cv.resnet import resnet56
+
+
+def create_model():
+    # please download the pre-trained weight file from
+    # https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet44-2a3cabcb.pt
+    pre_trained_model_path = "./config/resnet56_on_cifar10.pth"
+    model = resnet56(10, pretrained=True, path=pre_trained_model_path)
+    logging.info("load pretrained model successfully")
+    return model
+
+
+if __name__ == "__main__":
+    args = fedml.init()
+
+    # init device
+    device = fedml.device.get_device(args)
+
+    # load data
+    dataset, output_dim = fedml.data.load(args)
+
+    # load model
+    model = create_model()
+
+    # start training
+    fedml_runner = FedMLRunner(args, device, dataset, model)
+    fedml_runner.run()
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_mpi.py b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_mpi.py
new file mode 100644
index 0000000000..16e1939a95
--- /dev/null
+++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_mpi.py
@@ -0,0 +1,34 @@
+import logging
+import fedml
+from fedml import FedMLRunner
+from fedml.model.cv.resnet import resnet56
+
+
+def create_model():
+    """
+    load pretrained model...
+    please download the pre-trained weight file from
+    https://github.com/FedML-AI/FedML/blob/fedml_v0.6_before_fundraising/fedml_api/model/cv/pretrained/CIFAR10/resnet56/best.pth
+    and rename the file to ``resnet56_on_cifar10.pth''
+    """
+    pre_trained_model_path = "./config/resnet56_on_cifar10.pth"
+    model = resnet56(10, pretrained=True, path=pre_trained_model_path)
+    logging.info("load pretrained model successfully")
+    return model
+
+
+if __name__ == "__main__":
+    args = fedml.init()
+
+    # init device
+    device = fedml.device.get_device(args)
+
+    # load data
+    dataset, output_dim = fedml.data.load(args)
+
+    # load model
+    model = fedml.model.create(args, output_dim)
+
+    # start training
+    fedml_runner = FedMLRunner(args, device, dataset, model)
+    fedml_runner.run()
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_server.py b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_server.py
new file mode 100644
index 0000000000..0d507a94cb
--- /dev/null
+++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_server.py
@@ -0,0 +1,31 @@
+import logging
+
+import fedml
+from fedml import FedMLRunner
+from fedml.model.cv.resnet import resnet56
+
+
+def create_model():
+    # please download the pre-trained weight file from
+    # https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet44-2a3cabcb.pt
+    pre_trained_model_path = "./config/resnet56_on_cifar10.pth"
+    model = resnet56(10, pretrained=True, path=pre_trained_model_path)
+    logging.info("load pretrained model successfully")
+    return model
+
+
+if __name__ == "__main__":
+    args = fedml.init()
+
+    # init device
+    device = fedml.device.get_device(args)
+
+    # load data
+    dataset, output_dim = fedml.data.load(args)
+
+    # load model
+    model = create_model()
+
+    # start training
+    fedml_runner = FedMLRunner(args, device, dataset, model)
+    fedml_runner.run()
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/foolsgold/fedml_config.yaml b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/foolsgold/fedml_config.yaml
index 7d87ad567f..ee707b773a 100644
--- a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/foolsgold/fedml_config.yaml
+++ b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/foolsgold/fedml_config.yaml
@@ -9,13 +9,13 @@ environment_args:
   bootstrap: config/bootstrap.sh
 
 data_args:
-  dataset: "cifar10"
+  dataset: "mnist"
   data_cache_dir: ~/fedml_data
   partition_method: "homo"
   partition_alpha: 0.5
 
 model_args:
-  model: "resnet56"
+  model: "lr"
   model_file_cache_folder: "./model_file_cache" # will be filled by the server automatically
   global_model_file_path: "./model_file_cache/global_model.pt"
 
@@ -24,8 +24,8 @@ train_args:
   # for CLI running, this can be None; in MLOps deployment, `client_id_list` will be replaced with real-time selected devices
   client_id_list:
   # for FoolsGold Defense, if use_memory is true, then client_num_in_total should be equal to client_number_per_round
-  client_num_in_total: 8
-  client_num_per_round: 8
+  client_num_in_total: 4
+  client_num_per_round: 4
   comm_round: 10
   epochs: 1
   batch_size: 10
@@ -37,13 +37,13 @@ validation_args:
   frequency_of_the_test: 1
 
 device_args:
-  worker_num: 8
-  using_gpu: true
+  worker_num: 4
+  using_gpu: false
   gpu_mapping_file: config/foolsgold/gpu_mapping.yaml
   gpu_mapping_key: mapping_default
 
 comm_args:
-  backend: "MPI"
+  backend: "MQTT_S3"
 
 tracking_args:
   # the default log path is at ~/fedml-client/fedml/logs/ and ~/fedml-server/fedml/logs/
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/resnet56_on_cifar10.pth b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/resnet56_on_cifar10.pth
new file mode 100644
index 0000000000..d16cb52209
Binary files /dev/null and b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/resnet56_on_cifar10.pth differ
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_client.py b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_client.py
index 0d507a94cb..b4bd2b9db8 100644
--- a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_client.py
+++ b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_client.py
@@ -1,13 +1,16 @@
 import logging
-
 import fedml
 from fedml import FedMLRunner
 from fedml.model.cv.resnet import resnet56
 
 
 def create_model():
-    # please download the pre-trained weight file from
-    # https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet44-2a3cabcb.pt
+    """
+    loading pretrained model...
+    please download the pre-trained weight file from
+    https://github.com/FedML-AI/FedML/blob/fedml_v0.6_before_fundraising/fedml_api/model/cv/pretrained/CIFAR10/resnet56/best.pth
+    and rename the file to ``resnet56_on_cifar10.pth''
+    """
     pre_trained_model_path = "./config/resnet56_on_cifar10.pth"
     model = resnet56(10, pretrained=True, path=pre_trained_model_path)
     logging.info("load pretrained model successfully")
@@ -24,7 +27,7 @@ def create_model():
     dataset, output_dim = fedml.data.load(args)
 
     # load model
-    model = create_model()
+    model = fedml.model.create(args, output_dim)
 
     # start training
     fedml_runner = FedMLRunner(args, device, dataset, model)
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_mpi.py b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_mpi.py
index 0d507a94cb..b4bd2b9db8 100644
--- a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_mpi.py
+++ b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_mpi.py
@@ -1,13 +1,16 @@
 import logging
-
 import fedml
 from fedml import FedMLRunner
 from fedml.model.cv.resnet import resnet56
 
 
 def create_model():
-    # please download the pre-trained weight file from
-    # https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet44-2a3cabcb.pt
+    """
+    loading pretrained model...
+    please download the pre-trained weight file from
+    https://github.com/FedML-AI/FedML/blob/fedml_v0.6_before_fundraising/fedml_api/model/cv/pretrained/CIFAR10/resnet56/best.pth
+    and rename the file to ``resnet56_on_cifar10.pth''
+    """
     pre_trained_model_path = "./config/resnet56_on_cifar10.pth"
     model = resnet56(10, pretrained=True, path=pre_trained_model_path)
     logging.info("load pretrained model successfully")
@@ -24,7 +27,7 @@ def create_model():
     dataset, output_dim = fedml.data.load(args)
 
     # load model
-    model = create_model()
+    model = fedml.model.create(args, output_dim)
 
     # start training
     fedml_runner = FedMLRunner(args, device, dataset, model)
diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_server.py b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_server.py
index 0d507a94cb..b4bd2b9db8 100644
--- a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_server.py
+++ b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_server.py
@@ -1,13 +1,16 @@
 import logging
-
 import fedml
 from fedml import FedMLRunner
 from fedml.model.cv.resnet import resnet56
 
 
 def create_model():
-    # please download the pre-trained weight file from
-    # https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet44-2a3cabcb.pt
+    """
+    loading pretrained model...
+    please download the pre-trained weight file from
+    https://github.com/FedML-AI/FedML/blob/fedml_v0.6_before_fundraising/fedml_api/model/cv/pretrained/CIFAR10/resnet56/best.pth
+    and rename the file to ``resnet56_on_cifar10.pth''
+    """
     pre_trained_model_path = "./config/resnet56_on_cifar10.pth"
     model = resnet56(10, pretrained=True, path=pre_trained_model_path)
     logging.info("load pretrained model successfully")
@@ -24,7 +27,7 @@ def create_model():
     dataset, output_dim = fedml.data.load(args)
 
     # load model
-    model = create_model()
+    model = fedml.model.create(args, output_dim)
 
     # start training
     fedml_runner = FedMLRunner(args, device, dataset, model)
diff --git a/python/examples/simulation/mpi_torch_async_fedavg/config/gpu_mapping.yaml b/python/examples/simulation/mpi_torch_async_fedavg/config/gpu_mapping.yaml
index 1ba657af36..446c75e24a 100644
--- a/python/examples/simulation/mpi_torch_async_fedavg/config/gpu_mapping.yaml
+++ b/python/examples/simulation/mpi_torch_async_fedavg/config/gpu_mapping.yaml
@@ -1,4 +1,3 @@
-# Please check "GPU_MAPPING.md" to see how to define the topology
 # You can define a cluster containing multiple GPUs within multiple machines by defining `gpu_mapping.yaml` as follows:
 
 # config_cluster0:
diff --git a/python/examples/simulation/mpi_torch_fedavg/config/gpu_mapping.yaml b/python/examples/simulation/mpi_torch_fedavg/config/gpu_mapping.yaml
index 1ba657af36..446c75e24a 100644
--- a/python/examples/simulation/mpi_torch_fedavg/config/gpu_mapping.yaml
+++ b/python/examples/simulation/mpi_torch_fedavg/config/gpu_mapping.yaml
@@ -1,4 +1,3 @@
-# Please check "GPU_MAPPING.md" to see how to define the topology
 # You can define a cluster containing multiple GPUs within multiple machines by defining `gpu_mapping.yaml` as follows:
 
 # config_cluster0:
diff --git a/python/fedml/__init__.py b/python/fedml/__init__.py
index e38e1e2dc9..87fc4aa25b 100644
--- a/python/fedml/__init__.py
+++ b/python/fedml/__init__.py
@@ -23,7 +23,7 @@
 _global_training_type = None
 _global_comm_backend = None
 
-__version__ = "0.7.327"
+__version__ = "0.7.330"
 
 
 def init(args=None):
diff --git a/python/fedml/cli/env/collect_env.py b/python/fedml/cli/env/collect_env.py
index 19e7295eda..12775413e3 100644
--- a/python/fedml/cli/env/collect_env.py
+++ b/python/fedml/cli/env/collect_env.py
@@ -64,5 +64,17 @@ def collect_env():
             )
         )
         nvidia_smi.nvmlShutdown()
+
+        import torch
+
+        torch_is_available = torch.cuda.is_available()
+        print("torch_is_available = {}".format(torch_is_available))
+
+        device_count = torch.cuda.device_count()
+        print("device_count = {}".format(device_count))
+
+        device_name = torch.cuda.get_device_name(0)
+        print("device_name = {}".format(device_name))
+
     except:
         print("No GPU devices")
diff --git a/python/fedml/core/alg_frame/client_trainer.py b/python/fedml/core/alg_frame/client_trainer.py
index 053636a53d..513b34e67c 100644
--- a/python/fedml/core/alg_frame/client_trainer.py
+++ b/python/fedml/core/alg_frame/client_trainer.py
@@ -18,11 +18,19 @@ def __init__(self, model, args):
         self.model = model
         self.id = 0
         self.args = args
+        self.local_train_dataset = None
+        self.local_test_dataset = None
+        self.local_sample_number = 0
         FedMLDifferentialPrivacy.get_instance().init(args)
 
     def set_id(self, trainer_id):
         self.id = trainer_id
 
+    def update_dataset(self, local_train_dataset, local_test_dataset, local_sample_number):
+        self.local_train_dataset = local_train_dataset
+        self.local_test_dataset = local_test_dataset
+        self.local_sample_number = local_sample_number
+
     @abstractmethod
     def get_model_params(self):
         pass
diff --git a/python/fedml/core/alg_frame/server_aggregator.py b/python/fedml/core/alg_frame/server_aggregator.py
index fa59102efa..1db93002fb 100644
--- a/python/fedml/core/alg_frame/server_aggregator.py
+++ b/python/fedml/core/alg_frame/server_aggregator.py
@@ -38,7 +38,7 @@ def on_before_aggregation(
         if FedMLAttacker.get_instance().is_model_attack():
             raw_client_model_or_grad_list = FedMLAttacker.get_instance().attack_model(
                 raw_client_grad_list=raw_client_model_or_grad_list,
-                extra_auxiliary_info=None,
+                extra_auxiliary_info=self.get_model_params(),
             )
         if FedMLDefender.get_instance().is_defense_enabled():
             raw_client_model_or_grad_list = FedMLDefender.get_instance().defend_before_aggregation(
diff --git a/python/fedml/core/security/attack/byzantine_attack.py b/python/fedml/core/security/attack/byzantine_attack.py
index 3a83cac758..0dd72d1dce 100644
--- a/python/fedml/core/security/attack/byzantine_attack.py
+++ b/python/fedml/core/security/attack/byzantine_attack.py
@@ -1,4 +1,5 @@
 import random
+import fedml
 import numpy as np
 import torch
 from .attack_base import BaseAttackMethod
@@ -14,25 +15,26 @@ class ByzantineAttack(BaseAttackMethod):
     def __init__(self, args):
         self.byzantine_client_num = args.byzantine_client_num
         self.attack_mode = args.attack_mode  # random: randomly generate a weight; zero: set the weight to 0
+        self.device = fedml.device.get_device(args)
 
     def attack_model(self, raw_client_grad_list: List[Tuple[float, Dict]],
         extra_auxiliary_info: Any = None):
         if len(raw_client_grad_list) < self.byzantine_client_num:
             self.byzantine_client_num = len(raw_client_grad_list)
+        byzantine_idxs = self._get_malicious_client_idx(len(raw_client_grad_list))
+        print(f"byzantine_idxs={byzantine_idxs}")
         if self.attack_mode == "zero":
-            byzantine_local_w = self._attack_zero_mode(raw_client_grad_list)
+            byzantine_local_w = self._attack_zero_mode(raw_client_grad_list, byzantine_idxs)
         elif self.attack_mode == "random":
-            byzantine_local_w = self._attack_random_mode(raw_client_grad_list)
+            byzantine_local_w = self._attack_random_mode(raw_client_grad_list, byzantine_idxs)
         elif self.attack_mode == "flip":
-            byzantine_local_w = self._attack_flip_mode(raw_client_grad_list, extra_auxiliary_info) # extra_auxiliary_info: global model
+            byzantine_local_w = self._attack_flip_mode(raw_client_grad_list, byzantine_idxs, extra_auxiliary_info) # extra_auxiliary_info: global model
         else:
             raise NotImplementedError("Method not implemented!")
         return byzantine_local_w
 
-    def _attack_zero_mode(self, model_list):
-        byzantine_idxs = self._get_malicious_client_idx(len(model_list))
+    def _attack_zero_mode(self, model_list, byzantine_idxs):
         new_model_list = []
-
         for i in range(0, len(model_list)):
             if i not in byzantine_idxs:
                 new_model_list.append(model_list[i])
@@ -40,12 +42,11 @@ def _attack_zero_mode(self, model_list):
                 local_sample_number, local_model_params = model_list[i]
                 for k in local_model_params.keys():
                     if is_weight_param(k):
-                        local_model_params[k] = torch.from_numpy(np.zeros(local_model_params[k].size())).float()
+                        local_model_params[k] = torch.from_numpy(np.zeros(local_model_params[k].size())).float().to(self.device)
                 new_model_list.append((local_sample_number, local_model_params))
         return new_model_list
 
-    def _attack_random_mode(self, model_list):
-        byzantine_idxs = self._get_malicious_client_idx(len(model_list))
+    def _attack_random_mode(self, model_list, byzantine_idxs):
         new_model_list = []
 
         for i in range(0, len(model_list)):
@@ -55,13 +56,12 @@ def _attack_random_mode(self, model_list):
                 local_sample_number, local_model_params = model_list[i]
                 for k in local_model_params.keys():
                     if is_weight_param(k):
-                        local_model_params[k] = torch.from_numpy(np.random.random_sample(size=local_model_params[k].size())).float()
+                        local_model_params[k] = torch.from_numpy(np.random.random_sample(size=local_model_params[k].size())).float().to(self.device)
                 new_model_list.append((local_sample_number, local_model_params))
         return new_model_list
 
 
-    def _attack_flip_mode(self, model_list, global_model):
-        byzantine_idxs = self._get_malicious_client_idx(len(model_list))
+    def _attack_flip_mode(self, model_list, byzantine_idxs, global_model):
         new_model_list = []
         for i in range(0, len(model_list)):
             if i not in byzantine_idxs:
diff --git a/python/fedml/core/security/constants.py b/python/fedml/core/security/constants.py
index e199787223..963806dd34 100644
--- a/python/fedml/core/security/constants.py
+++ b/python/fedml/core/security/constants.py
@@ -10,6 +10,7 @@
 DEFENSE_FOOLSGOLD = "foolsgold"
 DEFENSE_CRFL = "crfl"
 DEFENSE_THREESIGMA = "3sigma"
+DEFENSE_THREESIGMA_GEOMEDIAN = "3sigma_geo"
 DEFENSE_MULTIKRUM = "multikrum"
 DEFENSE_TRIMMED_MEAN = "trimmed_mean"
 
diff --git a/python/fedml/core/security/defense/foolsgold_defense.py b/python/fedml/core/security/defense/foolsgold_defense.py
index 6dc109cf98..12c99ac6ea 100644
--- a/python/fedml/core/security/defense/foolsgold_defense.py
+++ b/python/fedml/core/security/defense/foolsgold_defense.py
@@ -1,8 +1,6 @@
 from typing import Callable, List, Tuple, Dict, Any
-
 import numpy as np
 from scipy import spatial
-
 from .defense_base import BaseDefenseMethod
 
 """
@@ -35,7 +33,7 @@ def defend_before_aggregation(
     ):
         client_num = len(raw_client_grad_list)
         importance_feature_list = self._get_importance_feature(raw_client_grad_list)
-        print(len(importance_feature_list))
+        # print(len(importance_feature_list))
 
         if self.memory is None:
             self.memory = importance_feature_list
@@ -78,7 +76,7 @@ def fools_gold_score(feature_vec_list):
         alpha[alpha <= 0.0] = 1e-15
 
         # Rescale so that max value is alpha
-        print(np.max(alpha))
+        # print(np.max(alpha))
         alpha = alpha / np.max(alpha)
         alpha[(alpha == 1.0)] = 0.999999
 
@@ -87,8 +85,6 @@ def fools_gold_score(feature_vec_list):
         alpha[(np.isinf(alpha) + alpha > 1)] = 1
         alpha[(alpha < 0)] = 0
 
-        print("alpha = {}".format(alpha))
-
         return alpha
 
     def _get_importance_feature(self, raw_client_grad_list):
@@ -100,7 +96,7 @@ def _get_importance_feature(self, raw_client_grad_list):
 
             # Get last key-value tuple
             (weight_name, importance_feature) = list(grads.items())[-2]
-            print(importance_feature)
+            # print(importance_feature)
             feature_len = np.array(importance_feature.cpu().data.detach().numpy().shape).prod()
             feature_vector = np.reshape(importance_feature.cpu().data.detach().numpy(), feature_len)
             ret_feature_vector_list.append(feature_vector)
diff --git a/python/fedml/core/security/defense/three_sigma_defense.py b/python/fedml/core/security/defense/three_sigma_defense.py
index e4dca0b89e..fd05d243f7 100644
--- a/python/fedml/core/security/defense/three_sigma_defense.py
+++ b/python/fedml/core/security/defense/three_sigma_defense.py
@@ -54,7 +54,7 @@ def __init__(self, config):
         ):
             self.pretraining_round_number = config.pretraining_round_num
         else:
-            self.pretraining_round_number = 5
+            self.pretraining_round_number = 2
         # ----------------- params for normal distribution ----------------- #
         self.mu = 0
         self.sigma = 0
diff --git a/python/fedml/core/security/defense/three_sigma_geomedian_defense.py b/python/fedml/core/security/defense/three_sigma_geomedian_defense.py
new file mode 100644
index 0000000000..57e85dc3fb
--- /dev/null
+++ b/python/fedml/core/security/defense/three_sigma_geomedian_defense.py
@@ -0,0 +1,193 @@
+import math
+import numpy as np
+from .defense_base import BaseDefenseMethod
+from typing import Callable, List, Tuple, Dict, Any
+from scipy import spatial
+from ..common.bucket import Bucket
+from ..common.utils import compute_geometric_median, compute_euclidean_distance
+import torch
+
+
+class ThreeSigmaGeoMedianDefense(BaseDefenseMethod):
+    def __init__(self, config):
+        self.memory = None
+        self.iteration_num = 1
+        self.score_list = []
+        self.geo_median = None
+
+        if hasattr(config, "bucketing_batch_size") and isinstance(config.bucketing_batch_size, int):
+            self.bucketing_batch_size = config.bucketing_batch_size
+        else:
+            self.bucketing_batch_size = 1
+        if hasattr(config, "pretraining_round_num") and isinstance(
+            config.pretraining_round_num, int
+        ):
+            self.pretraining_round_number = config.pretraining_round_num
+        else:
+            self.pretraining_round_number = 2
+        # ----------------- params for normal distribution ----------------- #
+        self.mu = 0
+        self.sigma = 0
+        self.upper_bound = 0
+        self.lower_bound = 0
+        self.bound_param = 1  # values outside mu +- sigma are outliers
+
+        if hasattr(config, "to_keep_higher_scores") and isinstance(config.to_keep_higher_scores, bool):
+            self.to_keep_higher_scores = config.to_keep_higher_scores
+        else:
+            self.to_keep_higher_scores = False  # true or false, depending on the score algo
+        self.score_function = "l2"
+
+    def run(
+        self,
+        raw_client_grad_list: List[Tuple[float, Dict]],
+        base_aggregation_func: Callable = None,
+        extra_auxiliary_info: Any = None,
+    ):
+        grad_list = self.defend_before_aggregation(
+            raw_client_grad_list, extra_auxiliary_info
+        )
+        return self.defend_on_aggregation(
+            grad_list, base_aggregation_func, extra_auxiliary_info
+        )
+
+    def defend_before_aggregation(
+        self,
+        raw_client_grad_list: List[Tuple[float, Dict]],
+        extra_auxiliary_info: Any = None,
+    ):
+        # grad_list = [grad for (_, grad) in raw_client_grad_list]
+        client_scores = self.compute_client_scores(raw_client_grad_list)
+        print(f"client scores = {client_scores}")
+        if self.iteration_num < self.pretraining_round_number:
+            self.score_list.extend(list(client_scores))
+            self.mu, self.sigma = self.compute_gaussian_distribution()
+            # if self.mu + self.bound_param * self.sigma >= 0:
+            self.upper_bound = self.mu + self.bound_param * self.sigma
+            # if self.mu - self.bound_param * self.sigma <= 0:
+            self.lower_bound = self.mu - self.bound_param * self.sigma
+            self.iteration_num += 1
+
+        for i in range(
+            len(client_scores) - 1, -1, -1
+        ):  # traverse the score list in a reversed order
+            if (
+                not self.to_keep_higher_scores and client_scores[i] > self.upper_bound
+            ) or (self.to_keep_higher_scores and client_scores[i] < self.lower_bound):
+                # here we do not remove the score in self.score_list to avoid mis-deleting
+                # due to severe non-iid among clients
+                raw_client_grad_list.pop(i)
+                print(f"pop -- i = {i}")
+        batch_grad_list = Bucket.bucketization(
+            raw_client_grad_list, self.bucketing_batch_size
+        )
+        return batch_grad_list
+
+    # def defend_on_aggregation(
+    #     self,
+    #     raw_client_grad_list: List[Tuple[float, Dict]],
+    #     base_aggregation_func: Callable = None,
+    #     extra_auxiliary_info: Any = None,
+    # ):  # raw_client_grad_list: batch_grad_list
+    #     # ----------- geometric median part, or just use base_aggregation_func -------------
+    #     # todo: why geometric median? what about other approaches?
+    #     (num0, avg_params) = raw_client_grad_list[0]
+    #     alphas = {alpha for (alpha, params) in raw_client_grad_list}
+    #     alphas = {alpha / sum(alphas, 0.0) for alpha in alphas}
+    #     for k in avg_params.keys():
+    #         batch_grads = [params[k] for (alpha, params) in raw_client_grad_list]
+    #         avg_params[k] = compute_geometric_median(alphas, batch_grads)
+    #     return avg_params
+
+    def compute_gaussian_distribution(self):
+        n = len(self.score_list)
+        mu = sum(list(self.score_list)) / n
+        temp = 0
+
+        for i in range(len(self.score_list)):
+            temp = (((self.score_list[i] - mu) ** 2) / (n - 1)) + temp
+        sigma = math.sqrt(temp)
+        print(f"mu = {mu}, sigma = {sigma}")
+        return mu, sigma
+
+    def compute_client_scores(self, raw_client_grad_list):
+        importance_feature_list = self._get_importance_feature(raw_client_grad_list)
+        if self.score_function == "foolsgold":
+            if self.memory is None:
+                self.memory = importance_feature_list
+            else:  # memory: potential bugs: grads in different iterations may be from different clients
+                for i in range(len(raw_client_grad_list)):
+                    self.memory[i] += importance_feature_list[i]
+            return self.fools_gold_score(self.memory)
+        if self.score_function == "l2":
+            if self.geo_median is None:
+                # (num0, avg_params) = raw_client_grad_list[0]
+                # alphas = {alpha for (alpha, params) in raw_client_grad_list}
+                # alphas = {alpha / sum(alphas, 0.0) for alpha in alphas}
+                alphas = [1/len(raw_client_grad_list)] * len(raw_client_grad_list)
+                self.geo_median = compute_geometric_median(alphas, importance_feature_list)
+            return self.l2_scores(importance_feature_list)
+
+    def l2_scores(self, importance_feature_list):
+        scores = []
+        for feature in importance_feature_list:
+            score = compute_euclidean_distance(torch.Tensor(feature), self.geo_median)
+            scores.append(score)
+        return scores
+
+
+    def _get_importance_feature(self, raw_client_grad_list):
+        # print(f"raw_client_grad_list = {raw_client_grad_list}")
+        # Foolsgold uses the last layer's gradient/weights as the importance feature.
+        ret_feature_vector_list = []
+        for idx in range(len(raw_client_grad_list)):
+            raw_grad = raw_client_grad_list[idx]
+            (p, grads) = raw_grad
+
+            # Get last key-value tuple
+            (weight_name, importance_feature) = list(grads.items())[-2]
+            # print(importance_feature)
+            feature_len = np.array(
+                importance_feature.cpu().data.detach().numpy().shape
+            ).prod()
+            feature_vector = np.reshape(
+                importance_feature.cpu().data.detach().numpy(), feature_len
+            )
+            ret_feature_vector_list.append(feature_vector)
+        return ret_feature_vector_list
+
+    @staticmethod
+    def fools_gold_score(feature_vec_list):
+        n_clients = len(feature_vec_list)
+        cs = np.zeros((n_clients, n_clients))
+        for i in range(n_clients):
+            for j in range(n_clients):
+                cs[i][j] = 1 - spatial.distance.cosine(
+                    feature_vec_list[i], feature_vec_list[j]
+                )
+        cs -= np.eye(n_clients)
+        maxcs = np.max(cs, axis=1)
+        # pardoning
+        for i in range(n_clients):
+            for j in range(n_clients):
+                if i == j:
+                    continue
+                if maxcs[i] < maxcs[j]:
+                    cs[i][j] = cs[i][j] * maxcs[i] / maxcs[j]
+        alpha = 1 - (np.max(cs, axis=1))
+        alpha[alpha > 1.0] = 1.0
+        alpha[alpha <= 0.0] = 1e-15
+
+        # Rescale so that max value is alpha
+         # print(np.max(alpha))
+        alpha = alpha / np.max(alpha)
+        alpha[(alpha == 1.0)] = 0.999999
+
+        # Logit function
+        alpha = np.log(alpha / (1 - alpha)) + 0.5
+        # alpha[(np.isinf(alpha) + alpha > 1)] = 1
+        # alpha[(alpha < 0)] = 0
+
+        print("alpha = {}".format(alpha))
+
+        return alpha
\ No newline at end of file
diff --git a/python/fedml/core/security/defense/three_sigma_krum_defense.py b/python/fedml/core/security/defense/three_sigma_krum_defense.py
new file mode 100644
index 0000000000..ae64eeacb1
--- /dev/null
+++ b/python/fedml/core/security/defense/three_sigma_krum_defense.py
@@ -0,0 +1,166 @@
+import math
+import numpy as np
+from .defense_base import BaseDefenseMethod
+from typing import Callable, List, Tuple, Dict, Any
+from ..common.bucket import Bucket
+from ..common.utils import compute_euclidean_distance, compute_middle_point
+import torch
+
+
+class ThreeSigmaKrumDefense(BaseDefenseMethod):
+    def __init__(self, config):
+        self.memory = None
+        self.iteration_num = 1
+        self.score_list = []
+        self.median = None
+
+        if hasattr(config, "bucketing_batch_size") and isinstance(
+            config.bucketing_batch_size, int
+        ):
+            self.bucketing_batch_size = config.bucketing_batch_size
+        else:
+            self.bucketing_batch_size = 1
+        if hasattr(config, "pretraining_round_num") and isinstance(
+            config.pretraining_round_num, int
+        ):
+            self.pretraining_round_number = config.pretraining_round_num
+        else:
+            self.pretraining_round_number = 2
+        # ----------------- params for normal distribution ----------------- #
+        self.mu = 0
+        self.sigma = 0
+        self.upper_bound = 0
+        self.lower_bound = 0
+        self.bound_param = 1  # values outside mu +- sigma are outliers
+
+        if hasattr(config, "to_keep_higher_scores") and isinstance(
+            config.to_keep_higher_scores, bool
+        ):
+            self.to_keep_higher_scores = config.to_keep_higher_scores
+        else:
+            self.to_keep_higher_scores = (
+                False  # true or false, depending on the score algo
+            )
+        self.score_function = "l2"
+
+    def run(
+        self,
+        raw_client_grad_list: List[Tuple[float, Dict]],
+        base_aggregation_func: Callable = None,
+        extra_auxiliary_info: Any = None,
+    ):
+        grad_list = self.defend_before_aggregation(
+            raw_client_grad_list, extra_auxiliary_info
+        )
+        return self.defend_on_aggregation(
+            grad_list, base_aggregation_func, extra_auxiliary_info
+        )
+
+    def defend_before_aggregation(
+        self,
+        raw_client_grad_list: List[Tuple[float, Dict]],
+        extra_auxiliary_info: Any = None,
+    ):
+        client_scores = self.compute_client_scores(raw_client_grad_list)
+        print(f"client scores = {client_scores}")
+        if self.iteration_num < self.pretraining_round_number:
+            self.score_list.extend(list(client_scores))
+            self.mu, self.sigma = self.compute_gaussian_distribution()
+            # if self.mu + self.bound_param * self.sigma >= 0:
+            self.upper_bound = self.mu + self.bound_param * self.sigma
+            # if self.mu - self.bound_param * self.sigma <= 0:
+            self.lower_bound = self.mu - self.bound_param * self.sigma
+            self.iteration_num += 1
+
+        for i in range(
+            len(client_scores) - 1, -1, -1
+        ):  # traverse the score list in a reversed order
+            if (
+                not self.to_keep_higher_scores and client_scores[i] > self.upper_bound
+            ) or (self.to_keep_higher_scores and client_scores[i] < self.lower_bound):
+                # here we do not remove the score in self.score_list to avoid mis-deleting
+                # due to severe non-iid among clients
+                raw_client_grad_list.pop(i)
+                print(f"pop -- i = {i}")
+        batch_grad_list = Bucket.bucketization(
+            raw_client_grad_list, self.bucketing_batch_size
+        )
+        return batch_grad_list
+
+    def compute_gaussian_distribution(self):
+        n = len(self.score_list)
+        mu = sum(list(self.score_list)) / n
+        temp = 0
+
+        for i in range(len(self.score_list)):
+            temp = (((self.score_list[i] - mu) ** 2) / (n - 1)) + temp
+        sigma = math.sqrt(temp)
+        print(f"mu = {mu}, sigma = {sigma}")
+        return mu, sigma
+
+    def compute_client_scores(self, raw_client_grad_list):
+        importance_feature_list = self._get_importance_feature(raw_client_grad_list)
+        if self.score_function == "l2":
+            if self.median is None:
+                # (num0, avg_params) = raw_client_grad_list[0]
+                # alphas = {alpha for (alpha, params) in raw_client_grad_list}
+                # alphas = {alpha / sum(alphas, 0.0) for alpha in alphas}
+                krum_scores = self._compute_krum_score(importance_feature_list)
+
+                score_index = torch.argsort(
+                    torch.Tensor(krum_scores)
+                ).tolist()  # indices; ascending
+                score_index = score_index[0 : math.floor(len(raw_client_grad_list) / 2)]
+                alphas = [1 / len(raw_client_grad_list)] * len(raw_client_grad_list)
+                honest_importance_feature_list = [
+                    importance_feature_list[i] for i in score_index
+                ]
+                self.median = compute_middle_point(
+                    alphas, honest_importance_feature_list
+                )
+            return self.l2_scores(importance_feature_list)
+
+    def l2_scores(self, importance_feature_list):
+        scores = []
+        for feature in importance_feature_list:
+            score = compute_euclidean_distance(torch.Tensor(feature), self.median)
+            scores.append(score)
+        return scores
+
+    def _compute_krum_score(self, vec_grad_list):
+        krum_scores = []
+        num_client = len(vec_grad_list)
+        for i in range(0, num_client):
+            dists = []
+            for j in range(0, num_client):
+                if i != j:
+                    dists.append(
+                        compute_euclidean_distance(
+                            torch.Tensor(vec_grad_list[i]),
+                            torch.Tensor(vec_grad_list[j]),
+                        ).item()
+                    )
+            dists.sort()  # ascending
+            score = dists[0 : math.floor(num_client / 2)]
+            krum_scores.append(sum(score))
+        return krum_scores
+
+    def _get_importance_feature(self, raw_client_grad_list):
+        # print(f"raw_client_grad_list = {raw_client_grad_list}")
+        # Foolsgold uses the last layer's gradient/weights as the importance feature.
+        ret_feature_vector_list = []
+        for idx in range(len(raw_client_grad_list)):
+            raw_grad = raw_client_grad_list[idx]
+            (p, grads) = raw_grad
+
+            # Get last key-value tuple
+            (weight_name, importance_feature) = list(grads.items())[-2]
+            # print(importance_feature)
+            feature_len = np.array(
+                importance_feature.cpu().data.detach().numpy().shape
+            ).prod()
+            feature_vector = np.reshape(
+                importance_feature.cpu().data.detach().numpy(), feature_len
+            )
+            ret_feature_vector_list.append(feature_vector)
+        return ret_feature_vector_list
diff --git a/python/fedml/core/security/fedml_defender.py b/python/fedml/core/security/fedml_defender.py
index b4e2060d3a..8fdaee78ef 100644
--- a/python/fedml/core/security/fedml_defender.py
+++ b/python/fedml/core/security/fedml_defender.py
@@ -4,6 +4,7 @@
 from .defense.coordinate_wise_trimmed_mean_defense import CoordinateWiseTrimmedMeanDefense
 from .defense.crfl_defense import CRFLDefense
 from .defense.three_sigma_defense import ThreeSigmaDefense
+from .defense.three_sigma_geomedian_defense import ThreeSigmaGeoMedianDefense
 from ..common.ml_engine_backend import MLEngineBackend
 from .defense.cclip_defense import CClipDefense
 from .defense.foolsgold_defense import FoolsGoldDefense
@@ -27,6 +28,7 @@
     DEFENSE_CRFL,
     DEFENSE_MULTIKRUM,
     DEFENSE_TRIMMED_MEAN,
+    DEFENSE_THREESIGMA_GEOMEDIAN,
 )
 
 
@@ -73,6 +75,8 @@ def init(self, args):
                 self.defender = FoolsGoldDefense(args)
             elif self.defense_type == DEFENSE_THREESIGMA:
                 self.defender = ThreeSigmaDefense(args)
+            elif self.defense_type == DEFENSE_THREESIGMA_GEOMEDIAN:
+                self.defender = ThreeSigmaGeoMedianDefense(args)
             elif self.defense_type == DEFENSE_CRFL:
                 self.defender = CRFLDefense(args)
             elif self.defense_type == DEFENSE_TRIMMED_MEAN:
@@ -125,6 +129,7 @@ def is_defense_before_aggregation(self):
             DEFENSE_SLSGD,
             DEFENSE_FOOLSGOLD,
             DEFENSE_THREESIGMA,
+            DEFENSE_THREESIGMA_GEOMEDIAN,
             DEFENSE_KRUM,
             DEFENSE_MULTIKRUM,
             DEFENSE_TRIMMED_MEAN
diff --git a/python/fedml/cross_silo/client/fedml_client_master_manager.py b/python/fedml/cross_silo/client/fedml_client_master_manager.py
index 69b2625ee5..f39640bfed 100644
--- a/python/fedml/cross_silo/client/fedml_client_master_manager.py
+++ b/python/fedml/cross_silo/client/fedml_client_master_manager.py
@@ -71,8 +71,8 @@ def handle_message_init(self, msg_params):
             global_model_params = convert_model_params_to_ddp(global_model_params)
             self.sync_process_group(0, global_model_params, data_silo_index)
 
-        self.trainer_dist_adapter.update_model(global_model_params)
         self.trainer_dist_adapter.update_dataset(int(data_silo_index))
+        self.trainer_dist_adapter.update_model(global_model_params)
         self.round_idx = 0
 
         self.__train()
@@ -86,9 +86,9 @@ def handle_message_receive_model_from_server(self, msg_params):
             model_params = convert_model_params_to_ddp(model_params)
             self.sync_process_group(self.round_idx, model_params, client_index)
 
-        self.trainer_dist_adapter.update_model(model_params)
         self.trainer_dist_adapter.update_dataset(int(client_index))
         logging.info("current roundx {}, num rounds {}".format(self.round_idx, self.num_rounds))
+        self.trainer_dist_adapter.update_model(model_params)
         if self.round_idx == self.num_rounds:
             mlops.log_training_finished_status()
             return
diff --git a/python/fedml/cross_silo/client/fedml_trainer.py b/python/fedml/cross_silo/client/fedml_trainer.py
index b24125bb66..efc673d5f1 100755
--- a/python/fedml/cross_silo/client/fedml_trainer.py
+++ b/python/fedml/cross_silo/client/fedml_trainer.py
@@ -35,6 +35,7 @@ def __init__(
 
         self.device = device
         self.args = args
+        self.args.device = device
 
     def update_model(self, weights):
         self.trainer.set_model_params(weights)
@@ -47,6 +48,7 @@ def update_dataset(self, client_index):
             self.train_local = self.train_data_local_dict[client_index]
         self.local_sample_number = self.train_data_local_num_dict[client_index]
         self.test_local = self.test_data_local_dict[client_index]
+        self.trainer.update_dataset(self.train_local, self.test_local, self.local_sample_number)
 
     def train(self, round_idx=None):
         self.args.round_idx = round_idx
diff --git a/python/fedml/cross_silo/lightsecagg/lsa_fedml_client_manager.py b/python/fedml/cross_silo/lightsecagg/lsa_fedml_client_manager.py
index 3f5c6f7b32..bb9d1185f4 100644
--- a/python/fedml/cross_silo/lightsecagg/lsa_fedml_client_manager.py
+++ b/python/fedml/cross_silo/lightsecagg/lsa_fedml_client_manager.py
@@ -94,8 +94,8 @@ def handle_message_init(self, msg_params):
 
         self.dimensions, self.total_dimension = model_dimension(global_model_params)
 
-        self.trainer.update_model(global_model_params)
         self.trainer.update_dataset(int(client_index))
+        self.trainer.update_model(global_model_params)
         self.round_idx = 0
         self.__offline()
 
@@ -118,8 +118,8 @@ def handle_message_receive_model_from_server(self, msg_params):
         model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS)
         client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX)
 
-        self.trainer.update_model(model_params)
         self.trainer.update_dataset(int(client_index))
+        self.trainer.update_model(model_params)
 
         if self.round_idx == self.num_rounds - 1:
             mlops.log_training_finished_status()
diff --git a/python/fedml/cross_silo/secagg/sa_fedml_client_manager.py b/python/fedml/cross_silo/secagg/sa_fedml_client_manager.py
index ee3acfb5af..a984541a8b 100644
--- a/python/fedml/cross_silo/secagg/sa_fedml_client_manager.py
+++ b/python/fedml/cross_silo/secagg/sa_fedml_client_manager.py
@@ -95,8 +95,9 @@ def handle_message_init(self, msg_params):
 
         self.dimensions, self.total_dimension = model_dimension(global_model_params)
 
-        self.trainer.update_model(global_model_params)
         self.trainer.update_dataset(int(client_index))
+        self.trainer.update_model(global_model_params)
+
         self.round_idx = 0
         self.__offline()
 
@@ -105,8 +106,8 @@ def handle_message_receive_model_from_server(self, msg_params):
         model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS)
         client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX)
 
-        self.trainer.update_model(model_params)
         self.trainer.update_dataset(int(client_index))
+        self.trainer.update_model(model_params)
 
         if self.round_idx == self.num_rounds - 1:
             mlops.log_training_finished_status()
diff --git a/python/fedml/cross_silo/server/fedml_aggregator.py b/python/fedml/cross_silo/server/fedml_aggregator.py
index f4480b533e..a2f6aa1e81 100644
--- a/python/fedml/cross_silo/server/fedml_aggregator.py
+++ b/python/fedml/cross_silo/server/fedml_aggregator.py
@@ -4,24 +4,24 @@
 
 import numpy as np
 import torch
-
 from fedml import mlops
+
 from ...ml.engine import ml_engine_adapter
 
 
 class FedMLAggregator(object):
     def __init__(
-        self,
-        train_global,
-        test_global,
-        all_train_data_num,
-        train_data_local_dict,
-        test_data_local_dict,
-        train_data_local_num_dict,
-        client_num,
-        device,
-        args,
-        server_aggregator,
+            self,
+            train_global,
+            test_global,
+            all_train_data_num,
+            train_data_local_dict,
+            test_data_local_dict,
+            train_data_local_num_dict,
+            client_num,
+            device,
+            args,
+            server_aggregator,
     ):
         self.aggregator = server_aggregator
 
@@ -37,6 +37,7 @@ def __init__(
 
         self.client_num = client_num
         self.device = device
+        self.args.device = device
         logging.info("self.device = {}".format(self.device))
         self.model_dict = dict()
         self.sample_num_dict = dict()
@@ -53,7 +54,9 @@ def set_global_model_params(self, model_parameters):
     def add_local_trained_result(self, index, model_params, sample_num):
         logging.info("add_model. index = %d" % index)
 
-        model_params = ml_engine_adapter.model_params_to_device(self.args, model_params, self.device)
+        # for dictionary model_params, we let the user level code to control the device
+        if type(model_params) is not dict:
+            model_params = ml_engine_adapter.model_params_to_device(self.args, model_params, self.device)
 
         self.model_dict[index] = model_params
         self.sample_num_dict[index] = sample_num
@@ -76,7 +79,12 @@ def aggregate(self):
             model_list.append((self.sample_num_dict[idx], self.model_dict[idx]))
         model_list = self.aggregator.on_before_aggregation(model_list)
         averaged_params = self.aggregator.aggregate(model_list)
-        averaged_params = self.aggregator.on_after_aggregation(averaged_params)
+
+        if type(averaged_params) is dict:
+            for client_index in range(len(averaged_params)):
+                averaged_params[client_index] = self.aggregator.on_after_aggregation(averaged_params[client_index])
+        else:
+            averaged_params = self.aggregator.on_after_aggregation(averaged_params)
 
         self.set_global_model_params(averaged_params)
 
diff --git a/python/fedml/cross_silo/server/fedml_server_manager.py b/python/fedml/cross_silo/server/fedml_server_manager.py
index 8cc799398b..649cbca556 100644
--- a/python/fedml/cross_silo/server/fedml_server_manager.py
+++ b/python/fedml/cross_silo/server/fedml_server_manager.py
@@ -3,6 +3,7 @@
 import time
 
 from fedml import mlops
+
 from .message_define import MyMessage
 from ...core.distributed.communication.message import Message
 from ...core.distributed.fedml_comm_manager import FedMLCommManager
@@ -11,7 +12,7 @@
 
 class FedMLServerManager(FedMLCommManager):
     def __init__(
-        self, args, aggregator, comm=None, client_rank=0, client_num=0, backend="MQTT_S3",
+            self, args, aggregator, comm=None, client_rank=0, client_num=0, backend="MQTT_S3",
     ):
         super().__init__(args, comm, client_rank, client_num, backend)
         self.args = args
@@ -44,7 +45,7 @@ def send_init_msg(self):
     def register_message_receive_handlers(self):
         logging.info("register_message_receive_handlers------")
         self.register_message_receive_handler(
-            MyMessage.MSG_TYPE_CONNECTION_IS_READY, self.handle_messag_connection_ready
+            MyMessage.MSG_TYPE_CONNECTION_IS_READY, self.handle_message_connection_ready
         )
 
         self.register_message_receive_handler(
@@ -55,7 +56,7 @@ def register_message_receive_handlers(self):
             MyMessage.MSG_TYPE_C2S_SEND_MODEL_TO_SERVER, self.handle_message_receive_model_from_client,
         )
 
-    def handle_messag_connection_ready(self, msg_params):
+    def handle_message_connection_ready(self, msg_params):
         self.client_id_list_in_this_round = self.aggregator.client_selection(
             self.args.round_idx, self.client_real_ids, self.args.client_num_per_round
         )
@@ -116,13 +117,6 @@ def handle_message_receive_model_from_client(self, msg_params):
         b_all_received = self.aggregator.check_whether_all_receive()
         logging.info("b_all_received = " + str(b_all_received))
         if b_all_received:
-            # if hasattr(self.args, "using_mlops") and self.args.using_mlops:
-            #     self.mlops_event.log_event_ended(
-            #         "server.wait", event_value=str(self.args.round_idx)
-            #     )
-            #     self.mlops_event.log_event_started(
-            #         "server.agg_and_eval", event_value=str(self.args.round_idx)
-            #     )
             mlops.event("server.wait", event_started=False, event_value=str(self.args.round_idx))
             mlops.event(
                 "server.agg_and_eval", event_started=True, event_value=str(self.args.round_idx),
@@ -150,9 +144,15 @@ def handle_message_receive_model_from_client(self, msg_params):
 
             client_idx_in_this_round = 0
             for receiver_id in self.client_id_list_in_this_round:
-                self.send_message_sync_model_to_client(
-                    receiver_id, global_model_params, self.data_silo_index_list[client_idx_in_this_round],
-                )
+                client_index = self.data_silo_index_list[client_idx_in_this_round]
+                if type(global_model_params) is dict:
+                    self.send_message_sync_model_to_client(
+                        receiver_id, global_model_params[client_index], client_index,
+                    )
+                else:
+                    self.send_message_sync_model_to_client(
+                        receiver_id, global_model_params, client_index,
+                    )
                 client_idx_in_this_round += 1
 
             self.args.round_idx += 1
@@ -161,10 +161,6 @@ def handle_message_receive_model_from_client(self, msg_params):
                 self.cleanup()
             else:
                 logging.info("\n\n==========end {}-th round training===========\n".format(self.args.round_idx))
-                # if hasattr(self.args, "using_mlops") and self.args.using_mlops:
-                #     self.mlops_event.log_event_started(
-                #         "server.wait", event_value=str(self.args.round_idx)
-                #     )
                 mlops.event("server.wait", event_started=True, event_value=str(self.args.round_idx))
 
     def cleanup(self):
@@ -202,7 +198,7 @@ def send_message_finish(self, receive_id, datasilo_index):
     def send_message_sync_model_to_client(self, receive_id, global_model_params, client_index):
         tick = time.time()
         logging.info("send_message_sync_model_to_client. receive_id = %d" % receive_id)
-        message = Message(MyMessage.MSG_TYPE_S2C_SYNC_MODEL_TO_CLIENT, self.get_sender_id(), receive_id,)
+        message = Message(MyMessage.MSG_TYPE_S2C_SYNC_MODEL_TO_CLIENT, self.get_sender_id(), receive_id, )
         message.add_params(MyMessage.MSG_ARG_KEY_MODEL_PARAMS, global_model_params)
         message.add_params(MyMessage.MSG_ARG_KEY_CLIENT_INDEX, str(client_index))
         message.add_params(MyMessage.MSG_ARG_KEY_CLIENT_OS, "PythonClient")
diff --git a/python/fedml/simulation/mpi/async_fedavg/AsyncFedAVGTrainer.py b/python/fedml/simulation/mpi/async_fedavg/AsyncFedAVGTrainer.py
index a1233eafab..1265fb298a 100644
--- a/python/fedml/simulation/mpi/async_fedavg/AsyncFedAVGTrainer.py
+++ b/python/fedml/simulation/mpi/async_fedavg/AsyncFedAVGTrainer.py
@@ -42,9 +42,6 @@ def train(self, round_idx=None):
 
         weights = self.trainer.get_model_params()
 
-        # transform Tensor to list
-        if self.args.is_mobile == 1:
-            weights = transform_tensor_to_list(weights)
         return weights, self.local_sample_number
 
     def test(self):
diff --git a/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgClientManager.py b/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgClientManager.py
index 936fcfbd33..3e8fb9af07 100644
--- a/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgClientManager.py
+++ b/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgClientManager.py
@@ -42,8 +42,6 @@ def handle_message_init(self, msg_params):
         global_model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS)
         client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX)
 
-        if self.args.is_mobile == 1:
-            global_model_params = transform_list_to_tensor(global_model_params)
         self.round_idx = 0
         self.__train(global_model_params, client_index)
 
@@ -56,9 +54,6 @@ def handle_message_receive_model_from_server(self, msg_params):
         global_model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS)
         client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX)
 
-        if self.args.is_mobile == 1:
-            model_params = transform_list_to_tensor(global_model_params)
-
         self.round_idx += 1
         self.__train(global_model_params, client_index)
         if self.round_idx == self.num_rounds - 1:
diff --git a/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgServerManager.py b/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgServerManager.py
index d6d430e050..2a456df3e5 100644
--- a/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgServerManager.py
+++ b/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgServerManager.py
@@ -47,8 +47,6 @@ def send_init_msg(self):
             range(self.args.client_num_in_total), num_clients, replace=False
         )
         global_model_params = self.aggregator.get_global_model_params()
-        if self.args.is_mobile == 1:
-            global_model_params = transform_tensor_to_list(global_model_params)
         for process_id in range(1, self.size):
             self.send_message_init_config(
                 process_id, global_model_params, client_indexes[process_id - 1]
@@ -96,13 +94,9 @@ def handle_message_receive_model_from_client(self, msg_params):
             )
 
         global_model_params = self.aggregator.get_global_model_params()
-        if self.args.is_mobile == 1:
-            global_model_params = transform_tensor_to_list(global_model_params)
 
         print("indexes of clients: " + str(client_indexes))
         print("size = %d" % self.size)
-        if self.args.is_mobile == 1:
-            global_model_params = transform_tensor_to_list(global_model_params)
 
         self.send_message_sync_model_to_client(
             sender_id, global_model_params, 
diff --git a/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py b/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py
index 7437fb82ce..f3121cc0b7 100644
--- a/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py
+++ b/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py
@@ -69,8 +69,6 @@ def aggregate(self):
         model_list = []
 
         for idx in range(self.worker_num):
-            if self.args.is_mobile == 1:
-                self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx])
             model_list.append((self.sample_num_dict[idx], self.model_dict[idx]))
             # training_num += self.sample_num_dict[idx]
         logging.info("len of self.model_dict[idx] = " + str(len(self.model_dict)))
diff --git a/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py.orig b/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py.orig
index d4721e5809..286b28767b 100644
--- a/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py.orig
+++ b/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py.orig
@@ -72,8 +72,6 @@ class FedAVGAggregator(object):
         training_num = 0
 
         for idx in range(self.worker_num):
-            if self.args.is_mobile == 1:
-                self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx])
             model_list.append((self.sample_num_dict[idx], self.model_dict[idx]))
             # training_num += self.sample_num_dict[idx]
         logging.info("len of self.model_dict[idx] = " + str(len(self.model_dict)))
diff --git a/python/fedml/simulation/mpi/fedavg/FedAVGTrainer.py b/python/fedml/simulation/mpi/fedavg/FedAVGTrainer.py
index 296f2736ce..0f039efcb2 100644
--- a/python/fedml/simulation/mpi/fedavg/FedAVGTrainer.py
+++ b/python/fedml/simulation/mpi/fedavg/FedAVGTrainer.py
@@ -42,9 +42,6 @@ def train(self, round_idx=None):
 
         weights = self.trainer.get_model_params()
 
-        # transform Tensor to list
-        if self.args.is_mobile == 1:
-            weights = transform_tensor_to_list(weights)
         return weights, self.local_sample_number
 
     def test(self):
diff --git a/python/fedml/simulation/mpi/fedavg/FedAvgClientManager.py b/python/fedml/simulation/mpi/fedavg/FedAvgClientManager.py
index e2e1cb51be..2cc81658b9 100644
--- a/python/fedml/simulation/mpi/fedavg/FedAvgClientManager.py
+++ b/python/fedml/simulation/mpi/fedavg/FedAvgClientManager.py
@@ -38,9 +38,6 @@ def handle_message_init(self, msg_params):
         global_model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS)
         client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX)
 
-        if self.args.is_mobile == 1:
-            global_model_params = transform_list_to_tensor(global_model_params)
-
         self.trainer.update_model(global_model_params)
         self.trainer.update_dataset(int(client_index))
         self.args.round_idx = 0
@@ -55,9 +52,6 @@ def handle_message_receive_model_from_server(self, msg_params):
         model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS)
         client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX)
 
-        if self.args.is_mobile == 1:
-            model_params = transform_list_to_tensor(model_params)
-
         self.trainer.update_model(model_params)
         self.trainer.update_dataset(int(client_index))
         self.args.round_idx += 1
diff --git a/python/fedml/simulation/mpi/fedavg/FedAvgServerManager.py b/python/fedml/simulation/mpi/fedavg/FedAvgServerManager.py
index db58ae4a91..4ba36f43a3 100644
--- a/python/fedml/simulation/mpi/fedavg/FedAvgServerManager.py
+++ b/python/fedml/simulation/mpi/fedavg/FedAvgServerManager.py
@@ -37,8 +37,6 @@ def send_init_msg(self):
             self.args.client_num_per_round,
         )
         global_model_params = self.aggregator.get_global_model_params()
-        if self.args.is_mobile == 1:
-            global_model_params = transform_tensor_to_list(global_model_params)
         for process_id in range(1, self.size):
             self.send_message_init_config(
                 process_id, global_model_params, client_indexes[process_id - 1]
@@ -87,8 +85,6 @@ def handle_message_receive_model_from_client(self, msg_params):
 
             print("indexes of clients: " + str(client_indexes))
             print("size = %d" % self.size)
-            if self.args.is_mobile == 1:
-                global_model_params = transform_tensor_to_list(global_model_params)
 
             for receiver_id in range(1, self.size):
                 self.send_message_sync_model_to_client(
diff --git a/python/fedml/simulation/mpi/fedavg_seq/FedAVGAggregator.py b/python/fedml/simulation/mpi/fedavg_seq/FedAVGAggregator.py
index 66831dbe8d..5adb0e0208 100644
--- a/python/fedml/simulation/mpi/fedavg_seq/FedAVGAggregator.py
+++ b/python/fedml/simulation/mpi/fedavg_seq/FedAVGAggregator.py
@@ -204,9 +204,6 @@ def aggregate(self):
         training_num = 0
 
         for idx in range(self.worker_num):
-            if self.args.is_mobile == 1:
-                self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx])
-
             # added for attack & defense; enable multiple defenses
             # if FedMLDefender.get_instance().is_defense_enabled():
             #     self.model_dict[idx] = FedMLDefender.get_instance().defend(
diff --git a/python/fedml/simulation/mpi/fedavg_seq/FedAVGTrainer.py b/python/fedml/simulation/mpi/fedavg_seq/FedAVGTrainer.py
index 94df1412de..148eb9b7c3 100644
--- a/python/fedml/simulation/mpi/fedavg_seq/FedAVGTrainer.py
+++ b/python/fedml/simulation/mpi/fedavg_seq/FedAVGTrainer.py
@@ -62,9 +62,6 @@ def train(self, round_idx=None):
         self.trainer.train(self.train_local, self.device, self.args)
         weights = self.trainer.get_model_params()
 
-        # transform Tensor to list
-        if self.args.is_mobile == 1:
-            weights = transform_tensor_to_list(weights)
         return weights, self.local_sample_number
 
     def test(self):
diff --git a/python/fedml/simulation/mpi/fedavg_seq/FedAvgClientManager.py b/python/fedml/simulation/mpi/fedavg_seq/FedAvgClientManager.py
index 89400b8522..7cfaa43056 100644
--- a/python/fedml/simulation/mpi/fedavg_seq/FedAvgClientManager.py
+++ b/python/fedml/simulation/mpi/fedavg_seq/FedAvgClientManager.py
@@ -35,8 +35,6 @@ def handle_message_init(self, msg_params):
         client_schedule = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_SCHEDULE)
         client_indexes = client_schedule[self.worker_id]
 
-        if self.args.is_mobile == 1:
-            global_model_params = transform_list_to_tensor(global_model_params)
         self.round_idx = 0
         self.__train(global_model_params, client_indexes, average_weight_dict)
 
@@ -53,9 +51,6 @@ def handle_message_receive_model_from_server(self, msg_params):
         client_schedule = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_SCHEDULE)
         client_indexes = client_schedule[self.worker_id]
 
-        if self.args.is_mobile == 1:
-            model_params = transform_list_to_tensor(global_model_params)
-
         self.round_idx += 1
         self.__train(global_model_params, client_indexes, average_weight_dict)
         if self.round_idx == self.num_rounds - 1:
diff --git a/python/fedml/simulation/mpi/fedavg_seq/FedAvgServerManager.py b/python/fedml/simulation/mpi/fedavg_seq/FedAvgServerManager.py
index 269023babd..f9269a50b9 100644
--- a/python/fedml/simulation/mpi/fedavg_seq/FedAvgServerManager.py
+++ b/python/fedml/simulation/mpi/fedavg_seq/FedAvgServerManager.py
@@ -44,8 +44,6 @@ def send_init_msg(self):
 
         global_model_params = self.aggregator.get_global_model_params()
 
-        if self.args.is_mobile == 1:
-            global_model_params = transform_tensor_to_list(global_model_params)
         for process_id in range(1, self.size):
             self.send_message_init_config(process_id, global_model_params, average_weight_dict, client_schedule)
 
@@ -104,13 +102,9 @@ def handle_message_receive_model_from_client(self, msg_params):
             average_weight_dict = self.aggregator.get_average_weight(client_indexes)
 
             global_model_params = self.aggregator.get_global_model_params()
-            if self.args.is_mobile == 1:
-                global_model_params = transform_tensor_to_list(global_model_params)
 
             print("indexes of clients: " + str(client_indexes))
             print("size = %d" % self.size)
-            if self.args.is_mobile == 1:
-                global_model_params = transform_tensor_to_list(global_model_params)
 
             for receiver_id in range(1, self.size):
                 self.send_message_sync_model_to_client(
diff --git a/python/fedml/simulation/mpi/fedgan/FedGANAggregator.py b/python/fedml/simulation/mpi/fedgan/FedGANAggregator.py
index f297882b85..826b2da7ec 100644
--- a/python/fedml/simulation/mpi/fedgan/FedGANAggregator.py
+++ b/python/fedml/simulation/mpi/fedgan/FedGANAggregator.py
@@ -72,8 +72,6 @@ def aggregate(self):
         training_num = 0
 
         for idx in range(self.worker_num):
-            if self.args.is_mobile == 1:
-                self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx])
             model_list.append((self.sample_num_dict[idx], self.model_dict[idx]))
             training_num += self.sample_num_dict[idx]
 
diff --git a/python/fedml/simulation/mpi/fedgan/FedGANTrainer.py b/python/fedml/simulation/mpi/fedgan/FedGANTrainer.py
index fe792b40ec..d9caae6958 100644
--- a/python/fedml/simulation/mpi/fedgan/FedGANTrainer.py
+++ b/python/fedml/simulation/mpi/fedgan/FedGANTrainer.py
@@ -41,9 +41,6 @@ def train(self, round_idx=None):
         self.trainer.train(self.train_local, self.device, self.args)
 
         weights = self.trainer.get_model_params()
-        # transform Tensor to list
-        if self.args.is_mobile == 1:
-            weights = transform_tensor_to_list(weights)
         return weights, self.local_sample_number
 
     def test(self):
diff --git a/python/fedml/simulation/mpi/fedgan/FedGanClientManager.py b/python/fedml/simulation/mpi/fedgan/FedGanClientManager.py
index 24875df4ef..df8dcc55bd 100644
--- a/python/fedml/simulation/mpi/fedgan/FedGanClientManager.py
+++ b/python/fedml/simulation/mpi/fedgan/FedGanClientManager.py
@@ -29,9 +29,6 @@ def handle_message_init(self, msg_params):
         global_model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS)
         client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX)
 
-        if self.args.is_mobile == 1:
-            global_model_params = transform_list_to_tensor(global_model_params)
-
         self.trainer.update_model(global_model_params)
         self.trainer.update_dataset(int(client_index))
         self.args.round_idx = 0
@@ -46,9 +43,6 @@ def handle_message_receive_model_from_server(self, msg_params):
         model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS)
         client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX)
 
-        if self.args.is_mobile == 1:
-            model_params = transform_list_to_tensor(model_params)
-
         self.trainer.update_model(model_params)
         self.trainer.update_dataset(int(client_index))
         self.args.round_idx += 1
diff --git a/python/fedml/simulation/mpi/fedgan/FedGanServerManager.py b/python/fedml/simulation/mpi/fedgan/FedGanServerManager.py
index 11325b7df0..15b8dc7390 100644
--- a/python/fedml/simulation/mpi/fedgan/FedGanServerManager.py
+++ b/python/fedml/simulation/mpi/fedgan/FedGanServerManager.py
@@ -37,8 +37,6 @@ def send_init_msg(self):
             self.args.client_num_per_round,
         )
         global_model_params = self.aggregator.get_global_model_params()
-        if self.args.is_mobile == 1:
-            global_model_params = transform_tensor_to_list(global_model_params)
         for process_id in range(1, self.size):
             self.send_message_init_config(
                 process_id, global_model_params, client_indexes[process_id - 1]
@@ -87,8 +85,6 @@ def handle_message_receive_model_from_client(self, msg_params):
 
             print("indexes of clients: " + str(client_indexes))
             print("size = %d" % self.size)
-            if self.args.is_mobile == 1:
-                global_model_params = transform_tensor_to_list(global_model_params)
 
             for receiver_id in range(1, self.size):
                 self.send_message_sync_model_to_client(
diff --git a/python/fedml/simulation/mpi/fednova/FedNovaAggregator.py b/python/fedml/simulation/mpi/fednova/FedNovaAggregator.py
index 9940cdfbb2..71fb4743c0 100644
--- a/python/fedml/simulation/mpi/fednova/FedNovaAggregator.py
+++ b/python/fedml/simulation/mpi/fednova/FedNovaAggregator.py
@@ -172,9 +172,6 @@ def aggregate(self):
         training_num = 0
 
         for idx in range(self.worker_num):
-            if self.args.is_mobile == 1:
-                self.result_dict[idx] = transform_list_to_tensor(self.result_dict[idx])
-
             if len(self.result_dict[idx]) > 0:
                 # some workers may not have parameters
                 # for client_index, client_result in self.result_dict[idx].items():
diff --git a/python/fedml/simulation/mpi/fednova/FedNovaClientManager.py b/python/fedml/simulation/mpi/fednova/FedNovaClientManager.py
index d4160d578a..ad2a99b2ed 100644
--- a/python/fedml/simulation/mpi/fednova/FedNovaClientManager.py
+++ b/python/fedml/simulation/mpi/fednova/FedNovaClientManager.py
@@ -44,8 +44,6 @@ def handle_message_init(self, msg_params):
         client_schedule = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_SCHEDULE)
         client_indexes = client_schedule[self.worker_id]
 
-        if self.args.is_mobile == 1:
-            global_model_params = transform_list_to_tensor(global_model_params)
         self.round_idx = 0
         self.__train(global_model_params, client_indexes, average_weight_dict)
 
@@ -62,9 +60,6 @@ def handle_message_receive_model_from_server(self, msg_params):
         client_schedule = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_SCHEDULE)
         client_indexes = client_schedule[self.worker_id]
 
-        if self.args.is_mobile == 1:
-            model_params = transform_list_to_tensor(global_model_params)
-
         self.round_idx += 1
         self.__train(global_model_params, client_indexes, average_weight_dict)
         if self.round_idx == self.num_rounds - 1:
diff --git a/python/fedml/simulation/mpi/fednova/FedNovaServerManager.py b/python/fedml/simulation/mpi/fednova/FedNovaServerManager.py
index 44c130cd5f..97d257dbe2 100644
--- a/python/fedml/simulation/mpi/fednova/FedNovaServerManager.py
+++ b/python/fedml/simulation/mpi/fednova/FedNovaServerManager.py
@@ -46,8 +46,6 @@ def send_init_msg(self):
 
         global_model_params = self.aggregator.get_global_model_params()
 
-        if self.args.is_mobile == 1:
-            global_model_params = transform_tensor_to_list(global_model_params)
         for process_id in range(1, self.size):
             self.send_message_init_config(
                 process_id, global_model_params, 
@@ -104,13 +102,9 @@ def handle_message_receive_model_from_client(self, msg_params):
             average_weight_dict = self.aggregator.get_average_weight(client_indexes)
 
             global_model_params = self.aggregator.get_global_model_params()
-            if self.args.is_mobile == 1:
-                global_model_params = transform_tensor_to_list(global_model_params)
 
             print("indexes of clients: " + str(client_indexes))
             print("size = %d" % self.size)
-            if self.args.is_mobile == 1:
-                global_model_params = transform_tensor_to_list(global_model_params)
 
             for receiver_id in range(1, self.size):
                 self.send_message_sync_model_to_client(
diff --git a/python/fedml/simulation/mpi/fednova/FedNovaTrainer.py b/python/fedml/simulation/mpi/fednova/FedNovaTrainer.py
index 9248b82338..d55e6d9822 100644
--- a/python/fedml/simulation/mpi/fednova/FedNovaTrainer.py
+++ b/python/fedml/simulation/mpi/fednova/FedNovaTrainer.py
@@ -64,9 +64,6 @@ def train(self, round_idx=None):
             ratio=self.local_sample_number / self.total_train_num)
         # weights = self.trainer.get_model_params()
 
-        # transform Tensor to list
-        if self.args.is_mobile == 1:
-            weights = transform_tensor_to_list(weights)
         # return weights, self.local_sample_number
         return avg_loss, norm_grad, tau_eff
 
diff --git a/python/fedml/simulation/mpi/fedopt/FedOptAggregator.py b/python/fedml/simulation/mpi/fedopt/FedOptAggregator.py
index 00d9712a8e..7000760666 100644
--- a/python/fedml/simulation/mpi/fedopt/FedOptAggregator.py
+++ b/python/fedml/simulation/mpi/fedopt/FedOptAggregator.py
@@ -83,8 +83,6 @@ def aggregate(self):
         training_num = 0
 
         for idx in range(self.worker_num):
-            if self.args.is_mobile == 1:
-                self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx])
             model_list.append((self.sample_num_dict[idx], self.model_dict[idx]))
             training_num += self.sample_num_dict[idx]
 
diff --git a/python/fedml/simulation/mpi/fedopt/FedOptClientManager.py b/python/fedml/simulation/mpi/fedopt/FedOptClientManager.py
index 527487aa55..63222972ea 100644
--- a/python/fedml/simulation/mpi/fedopt/FedOptClientManager.py
+++ b/python/fedml/simulation/mpi/fedopt/FedOptClientManager.py
@@ -29,9 +29,6 @@ def handle_message_init(self, msg_params):
         global_model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS)
         client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX)
 
-        if self.args.is_mobile == 1:
-            global_model_params = transform_list_to_tensor(global_model_params)
-
         self.trainer.update_model(global_model_params)
         self.trainer.update_dataset(int(client_index))
         self.args.round_idx = 0
@@ -46,9 +43,6 @@ def handle_message_receive_model_from_server(self, msg_params):
         model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS)
         client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX)
 
-        if self.args.is_mobile == 1:
-            model_params = transform_list_to_tensor(model_params)
-
         self.trainer.update_model(model_params)
         self.trainer.update_dataset(int(client_index))
         self.args.round_idx += 1
diff --git a/python/fedml/simulation/mpi/fedopt/FedOptServerManager.py b/python/fedml/simulation/mpi/fedopt/FedOptServerManager.py
index 73a387f12f..febbb4ac39 100644
--- a/python/fedml/simulation/mpi/fedopt/FedOptServerManager.py
+++ b/python/fedml/simulation/mpi/fedopt/FedOptServerManager.py
@@ -85,9 +85,6 @@ def handle_message_receive_model_from_client(self, msg_params):
                 )
 
             print("size = %d" % self.size)
-            if self.args.is_mobile == 1:
-                print("transform_tensor_to_list")
-                global_model_params = transform_tensor_to_list(global_model_params)
 
             for receiver_id in range(1, self.size):
                 self.send_message_sync_model_to_client(
diff --git a/python/fedml/simulation/mpi/fedopt/FedOptTrainer.py b/python/fedml/simulation/mpi/fedopt/FedOptTrainer.py
index 8680f55369..00661f35b0 100644
--- a/python/fedml/simulation/mpi/fedopt/FedOptTrainer.py
+++ b/python/fedml/simulation/mpi/fedopt/FedOptTrainer.py
@@ -38,7 +38,4 @@ def train(self, round_idx=None):
 
         weights = self.trainer.get_model_params()
 
-        # transform Tensor to list
-        if self.args.is_mobile == 1:
-            weights = transform_tensor_to_list(weights)
         return weights, self.local_sample_number
diff --git a/python/fedml/simulation/mpi/fedopt_seq/FedOptAggregator.py b/python/fedml/simulation/mpi/fedopt_seq/FedOptAggregator.py
index b2de3c1e72..dc91017c3b 100644
--- a/python/fedml/simulation/mpi/fedopt_seq/FedOptAggregator.py
+++ b/python/fedml/simulation/mpi/fedopt_seq/FedOptAggregator.py
@@ -213,8 +213,6 @@ def aggregate(self):
         training_num = 0
 
         for idx in range(self.worker_num):
-            if self.args.is_mobile == 1:
-                self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx])
             if len(self.model_dict[idx]) > 0:
                 # some workers may not have parameters 
                 model_list.append(self.model_dict[idx])
diff --git a/python/fedml/simulation/mpi/fedopt_seq/FedOptClientManager.py b/python/fedml/simulation/mpi/fedopt_seq/FedOptClientManager.py
index 484306dade..3ec4cdf370 100644
--- a/python/fedml/simulation/mpi/fedopt_seq/FedOptClientManager.py
+++ b/python/fedml/simulation/mpi/fedopt_seq/FedOptClientManager.py
@@ -35,9 +35,6 @@ def handle_message_init(self, msg_params):
         client_schedule = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_SCHEDULE)
         client_indexes = client_schedule[self.worker_id]
 
-        if self.args.is_mobile == 1:
-            global_model_params = transform_list_to_tensor(global_model_params)
-
         self.round_idx = 0
         self.__train(global_model_params, client_indexes, average_weight_dict)
 
@@ -53,9 +50,6 @@ def handle_message_receive_model_from_server(self, msg_params):
         client_schedule = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_SCHEDULE)
         client_indexes = client_schedule[self.worker_id]
 
-        if self.args.is_mobile == 1:
-            model_params = transform_list_to_tensor(global_model_params)
-
         self.round_idx += 1
         self.__train(global_model_params, client_indexes, average_weight_dict)
         if self.round_idx == self.num_rounds - 1:
diff --git a/python/fedml/simulation/mpi/fedopt_seq/FedOptServerManager.py b/python/fedml/simulation/mpi/fedopt_seq/FedOptServerManager.py
index 98cfbf856d..207fcf37ed 100644
--- a/python/fedml/simulation/mpi/fedopt_seq/FedOptServerManager.py
+++ b/python/fedml/simulation/mpi/fedopt_seq/FedOptServerManager.py
@@ -97,9 +97,6 @@ def handle_message_receive_model_from_client(self, msg_params):
             global_model_params = self.aggregator.get_global_model_params()
 
             print("size = %d" % self.size)
-            if self.args.is_mobile == 1:
-                print("transform_tensor_to_list")
-                global_model_params = transform_tensor_to_list(global_model_params)
 
             for receiver_id in range(1, self.size):
                 self.send_message_sync_model_to_client(
diff --git a/python/fedml/simulation/mpi/fedopt_seq/FedOptTrainer.py b/python/fedml/simulation/mpi/fedopt_seq/FedOptTrainer.py
index 8680f55369..00661f35b0 100644
--- a/python/fedml/simulation/mpi/fedopt_seq/FedOptTrainer.py
+++ b/python/fedml/simulation/mpi/fedopt_seq/FedOptTrainer.py
@@ -38,7 +38,4 @@ def train(self, round_idx=None):
 
         weights = self.trainer.get_model_params()
 
-        # transform Tensor to list
-        if self.args.is_mobile == 1:
-            weights = transform_tensor_to_list(weights)
         return weights, self.local_sample_number
diff --git a/python/fedml/simulation/mpi/fedprox/FedProxAggregator.py b/python/fedml/simulation/mpi/fedprox/FedProxAggregator.py
index 63c768b47e..026729e264 100644
--- a/python/fedml/simulation/mpi/fedprox/FedProxAggregator.py
+++ b/python/fedml/simulation/mpi/fedprox/FedProxAggregator.py
@@ -69,8 +69,6 @@ def aggregate(self):
         training_num = 0
 
         for idx in range(self.worker_num):
-            if self.args.is_mobile == 1:
-                self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx])
             model_list.append((self.sample_num_dict[idx], self.model_dict[idx]))
             training_num += self.sample_num_dict[idx]
 
diff --git a/python/fedml/simulation/mpi/fedprox/FedProxClientManager.py b/python/fedml/simulation/mpi/fedprox/FedProxClientManager.py
index 0375a0875b..860fe336b0 100644
--- a/python/fedml/simulation/mpi/fedprox/FedProxClientManager.py
+++ b/python/fedml/simulation/mpi/fedprox/FedProxClientManager.py
@@ -29,7 +29,6 @@ def handle_message_init(self, msg_params):
         global_model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS)
         client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX)
 
-        # if self.args.is_mobile == 1:
         global_model_params = transform_list_to_tensor(global_model_params)
 
         self.trainer.update_model(global_model_params)
@@ -48,9 +47,6 @@ def handle_message_receive_model_from_server(self, msg_params):
 
         model_params = transform_list_to_tensor(model_params)
 
-        # if self.args.is_mobile == 1:
-        # model_params = transform_list_to_tensor(model_params)
-
         self.trainer.update_model(model_params)
         self.trainer.update_dataset(int(client_index))
         self.args.round_idx += 1
diff --git a/python/fedml/simulation/mpi/fedprox/FedProxServerManager.py b/python/fedml/simulation/mpi/fedprox/FedProxServerManager.py
index ed903f5d5a..ccf9f087cf 100644
--- a/python/fedml/simulation/mpi/fedprox/FedProxServerManager.py
+++ b/python/fedml/simulation/mpi/fedprox/FedProxServerManager.py
@@ -86,9 +86,6 @@ def handle_message_receive_model_from_client(self, msg_params):
 
             print("indexes of clients: " + str(client_indexes))
             print("size = %d" % self.size)
-            if self.args.is_mobile == 1:
-                print("transform_tensor_to_list")
-                global_model_params = transform_tensor_to_list(global_model_params)
 
             for receiver_id in range(1, self.size):
                 self.send_message_sync_model_to_client(
diff --git a/python/fedml/simulation/mpi/fedprox/FedProxTrainer.py b/python/fedml/simulation/mpi/fedprox/FedProxTrainer.py
index 8ddc4138e8..e77096b452 100644
--- a/python/fedml/simulation/mpi/fedprox/FedProxTrainer.py
+++ b/python/fedml/simulation/mpi/fedprox/FedProxTrainer.py
@@ -45,9 +45,6 @@ def train(self, round_idx=None):
 
         weights = self.trainer.get_model_params()
 
-        # transform Tensor to list
-        if self.args.is_mobile == 1:
-            weights = transform_tensor_to_list(weights)
         return weights, self.local_sample_number
 
     def test(self):
diff --git a/python/fedml/simulation/mpi/fedseg/FedSegAggregator.py b/python/fedml/simulation/mpi/fedseg/FedSegAggregator.py
index 435efcdf79..bb126cae8e 100644
--- a/python/fedml/simulation/mpi/fedseg/FedSegAggregator.py
+++ b/python/fedml/simulation/mpi/fedseg/FedSegAggregator.py
@@ -68,8 +68,6 @@ def aggregate(self):
         training_num = 0
 
         for idx in range(self.worker_num):
-            if self.args.is_mobile == 1:
-                self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx])
             model_list.append((self.sample_num_dict[idx], self.model_dict[idx]))
             training_num += self.sample_num_dict[idx]
 
diff --git a/python/fedml/simulation/mpi/fedseg/FedSegClientManager.py b/python/fedml/simulation/mpi/fedseg/FedSegClientManager.py
index d4635365d5..de6ce9f19b 100644
--- a/python/fedml/simulation/mpi/fedseg/FedSegClientManager.py
+++ b/python/fedml/simulation/mpi/fedseg/FedSegClientManager.py
@@ -33,10 +33,6 @@ def handle_message_init(self, msg_params):
                 client_index
             )
         )
-
-        if self.args.is_mobile == 1:
-            global_model_params = transform_list_to_tensor(global_model_params)
-
         self.trainer.update_model(global_model_params)
         self.trainer.update_dataset(int(client_index))
         self.args.round_idx = 0
@@ -51,9 +47,6 @@ def handle_message_receive_model_from_server(self, msg_params):
         model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS)
         client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX)
 
-        if self.args.is_mobile == 1:
-            model_params = transform_list_to_tensor(model_params)
-
         self.trainer.update_model(model_params)
         self.trainer.update_dataset(int(client_index))
         self.args.round_idx += 1
diff --git a/python/fedml/simulation/mpi/fedseg/FedSegServerManager.py b/python/fedml/simulation/mpi/fedseg/FedSegServerManager.py
index eb5c55163b..d1382c6d77 100644
--- a/python/fedml/simulation/mpi/fedseg/FedSegServerManager.py
+++ b/python/fedml/simulation/mpi/fedseg/FedSegServerManager.py
@@ -76,10 +76,6 @@ def handle_message_receive_model_from_client(self, msg_params):
                 self.args.client_num_per_round,
             )
 
-            if self.args.is_mobile == 1:
-
-                global_model_params = transform_tensor_to_list(global_model_params)
-
             for receiver_id in range(1, self.size):
                 self.send_message_sync_model_to_client(
                     receiver_id, global_model_params, client_indexes[receiver_id - 1]
diff --git a/python/fedml/simulation/mpi/fedseg/FedSegTrainer.py b/python/fedml/simulation/mpi/fedseg/FedSegTrainer.py
index b05b5c9f32..f0bda08b47 100644
--- a/python/fedml/simulation/mpi/fedseg/FedSegTrainer.py
+++ b/python/fedml/simulation/mpi/fedseg/FedSegTrainer.py
@@ -43,9 +43,6 @@ def train(self):
         self.trainer.train(self.train_local, self.device)
         weights = self.trainer.get_model_params()
 
-        # transform Tensor to list
-        if self.args.is_mobile == 1:
-            weights = transform_tensor_to_list(weights)
         return weights, self.local_sample_number
 
     def test(self):
diff --git a/python/setup.py b/python/setup.py
index 36a5e2ab98..da052dd3f2 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -73,7 +73,7 @@ def finalize_options(self):
 
 setup(
     name="fedml",
-    version="0.7.327",
+    version="0.7.330",
     author="FedML Team",
     author_email="ch@fedml.ai",
     description="A research and production integrated edge-cloud library for "