diff --git a/python/app/fedcv/image_segmentation/config/gpu_mapping.yaml b/python/app/fedcv/image_segmentation/config/gpu_mapping.yaml index 1ba657af36..446c75e24a 100644 --- a/python/app/fedcv/image_segmentation/config/gpu_mapping.yaml +++ b/python/app/fedcv/image_segmentation/config/gpu_mapping.yaml @@ -1,4 +1,3 @@ -# Please check "GPU_MAPPING.md" to see how to define the topology # You can define a cluster containing multiple GPUs within multiple machines by defining `gpu_mapping.yaml` as follows: # config_cluster0: diff --git a/python/app/fedcv/image_segmentation/config/simulation/gpu_mapping.yaml b/python/app/fedcv/image_segmentation/config/simulation/gpu_mapping.yaml index 1ba657af36..446c75e24a 100644 --- a/python/app/fedcv/image_segmentation/config/simulation/gpu_mapping.yaml +++ b/python/app/fedcv/image_segmentation/config/simulation/gpu_mapping.yaml @@ -1,4 +1,3 @@ -# Please check "GPU_MAPPING.md" to see how to define the topology # You can define a cluster containing multiple GPUs within multiple machines by defining `gpu_mapping.yaml` as follows: # config_cluster0: diff --git a/python/app/fedcv/object_detection/model/yolov5/requirements.txt b/python/app/fedcv/object_detection/model/yolov5/requirements.txt index 4a4f68539c..ce68986a8c 100644 --- a/python/app/fedcv/object_detection/model/yolov5/requirements.txt +++ b/python/app/fedcv/object_detection/model/yolov5/requirements.txt @@ -12,7 +12,7 @@ scipy>=1.4.1 # Google Colab version torch>=1.7.0,!=1.12.0 # https://github.com/ultralytics/yolov5/issues/8395 torchvision>=0.8.1,!=0.13.0 # https://github.com/ultralytics/yolov5/issues/8395 tqdm>=4.41.0 -protobuf<4.21.3 # https://github.com/ultralytics/yolov5/issues/8012 +protobuf>=4.21.6 # https://github.com/ultralytics/yolov5/issues/8012 # Logging ------------------------------------- tensorboard>=2.4.1 diff --git a/python/app/fedcv/object_detection/model/yolov7/requirements.txt b/python/app/fedcv/object_detection/model/yolov7/requirements.txt index 6b71f21689..ed8045a364 100644 --- a/python/app/fedcv/object_detection/model/yolov7/requirements.txt +++ b/python/app/fedcv/object_detection/model/yolov7/requirements.txt @@ -11,7 +11,7 @@ scipy>=1.4.1 torch>=1.7.0,!=1.12.0 torchvision>=0.8.1,!=0.13.0 tqdm>=4.41.0 -protobuf<4.21.3 +protobuf>=4.21.6 # Logging ------------------------------------- tensorboard>=2.4.1 diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/README.md b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/README.md new file mode 100644 index 0000000000..d125847dd6 --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/README.md @@ -0,0 +1,31 @@ +## Training Script + +At the client side, the client ID (a.k.a rank) starts from 1. +Please also modify config/fedml_config.yaml, changing the `worker_num` the as the number of clients you plan to run. + +At the server side, run the following script: +``` +bash run_server.sh your_run_id +``` + +For client 1, run the following script: +``` +bash run_client.sh 1 your_run_id +``` +For client 2, run the following script: +``` +bash run_client.sh 2 your_run_id +``` +Note: please run the server first. + +## A Better User-experience with FedML MLOps (open.fedml.ai) +To reduce the difficulty and complexity of these CLI commands. We recommend you to use our MLOps (open.fedml.ai). +FedML MLOps provides: +- Install Client Agent and Login +- Inviting Collaborators and group management +- Project Management +- Experiment Tracking (visualizing training results) +- monitoring device status +- visualizing system performance (including profiling flow chart) +- distributed logging +- model serving \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/__init__.py b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/bootstrap.sh b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/bootstrap.sh new file mode 100644 index 0000000000..1f0287a7b6 --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/bootstrap.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# pip install fedml==0.7.15 +#pip install --upgrade fedml + +### don't modify this part ### +echo "[FedML]Bootstrap Finished" +############################## diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/foolsgold/fedml_config.yaml b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/foolsgold/fedml_config.yaml new file mode 100644 index 0000000000..2c8a9001b9 --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/foolsgold/fedml_config.yaml @@ -0,0 +1,65 @@ +common_args: + training_type: "cross_silo" + scenario: "horizontal" + using_mlops: false + random_seed: 0 + config_version: release + +environment_args: + bootstrap: config/bootstrap.sh + +data_args: + dataset: "cifar10" + data_cache_dir: ~/fedml_data + partition_method: "homo" + partition_alpha: 0.5 + +model_args: + model: "resnet56" + model_file_cache_folder: "./model_file_cache" # will be filled by the server automatically + global_model_file_path: "./model_file_cache/global_model.pt" + +train_args: + federated_optimizer: "FedAvg" + # for CLI running, this can be None; in MLOps deployment, `client_id_list` will be replaced with real-time selected devices + client_id_list: + # for FoolsGold Defense, if use_memory is true, then client_num_in_total should be equal to client_number_per_round + client_num_in_total: 8 + client_num_per_round: 8 + comm_round: 10 + epochs: 1 + batch_size: 10 + client_optimizer: sgd + learning_rate: 0.03 + weight_decay: 0.001 + +validation_args: + frequency_of_the_test: 1 + +device_args: + worker_num: 8 + using_gpu: true + gpu_mapping_file: config/foolsgold/gpu_mapping.yaml + gpu_mapping_key: mapping_default + +comm_args: + backend: "MPI" + +tracking_args: + # the default log path is at ~/fedml-client/fedml/logs/ and ~/fedml-server/fedml/logs/ + enable_wandb: false + wandb_key: ee0b5f53d949c84cee7decbe7a629e63fb2f8408 + wandb_project: fedml + wandb_name: fedml_torch_fedavg_mnist_lr + + +attack_args: + enable_attack: true + attack_type: byzantine + attack_mode: random + byzantine_client_num: 1 + +# for FoolsGold Defense, if use_memory is true, then client_num_in_total should be equal to client_number_per_round +defense_args: + enable_defense: true + defense_type: foolsgold diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/foolsgold/gpu_mapping.yaml b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/foolsgold/gpu_mapping.yaml new file mode 100644 index 0000000000..79a9a634b1 --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/foolsgold/gpu_mapping.yaml @@ -0,0 +1,3 @@ +# this is used for 4 clients and 1 server training within a single machine which has 8 GPUs, but you hope to skip the GPU device ID. +mapping_default: + host1: [3, 2, 2, 2] # assume we only have 4 GPUs \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/gpu_mapping.yaml b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/gpu_mapping.yaml new file mode 100644 index 0000000000..446c75e24a --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/gpu_mapping.yaml @@ -0,0 +1,60 @@ +# You can define a cluster containing multiple GPUs within multiple machines by defining `gpu_mapping.yaml` as follows: + +# config_cluster0: +# host_name_node0: [num_of_processes_on_GPU0, num_of_processes_on_GPU1, num_of_processes_on_GPU2, num_of_processes_on_GPU3, ..., num_of_processes_on_GPU_n] +# host_name_node1: [num_of_processes_on_GPU0, num_of_processes_on_GPU1, num_of_processes_on_GPU2, num_of_processes_on_GPU3, ..., num_of_processes_on_GPU_n] +# host_name_node_m: [num_of_processes_on_GPU0, num_of_processes_on_GPU1, num_of_processes_on_GPU2, num_of_processes_on_GPU3, ..., num_of_processes_on_GPU_n] + + +# this is used for 10 clients and 1 server training within a single machine which has 4 GPUs +mapping_default: + ChaoyangHe-GPU-RTX2080Tix4: [3, 3, 3, 2] + +# this is used for 4 clients and 1 server training within a single machine which has 4 GPUs +mapping_config1_5: + host1: [2, 1, 1, 1] + +# this is used for 10 clients and 1 server training within a single machine which has 4 GPUs +mapping_config2_11: + host1: [3, 3, 3, 2] + +# this is used for 10 clients and 1 server training within a single machine which has 8 GPUs +mapping_config3_11: + host1: [2, 2, 2, 1, 1, 1, 1, 1] + +# this is used for 4 clients and 1 server training within a single machine which has 8 GPUs, but you hope to skip the GPU device ID. +mapping_config4_5: + host1: [1, 0, 0, 1, 1, 0, 1, 1] + +# this is used for 4 clients and 1 server training using 6 machines, each machine has 2 GPUs inside, but you hope to use the second GPU. +mapping_config5_6: + host1: [0, 1] + host2: [0, 1] + host3: [0, 1] + host4: [0, 1] + host5: [0, 1] +# this is used for 4 clients and 1 server training using 2 machines, each machine has 2 GPUs inside, but you hope to use the second GPU. +mapping_config5_2: + gpu-worker2: [1,1] + gpu-worker1: [2,1] + +# this is used for 10 clients and 1 server training using 4 machines, each machine has 2 GPUs inside, but you hope to use the second GPU. +mapping_config5_4: + gpu-worker2: [1,1] + gpu-worker1: [2,1] + gpu-worker3: [3,1] + gpu-worker4: [1,1] + +# for grpc GPU mapping +mapping_FedML_gRPC: + hostname_node_server: [1] + hostname_node_1: [1, 0, 0, 0] + hostname_node_2: [1, 0, 0, 0] + +# for torch RPC GPU mapping +mapping_FedML_tRPC: + lambda-server1: [0, 0, 0, 0, 2, 2, 1, 1] + lambda-server2: [2, 1, 1, 1, 0, 0, 0, 0] + +#mapping_FedML_tRPC: +# lambda-server1: [0, 0, 0, 0, 3, 3, 3, 2] \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_client.sh b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_client.sh new file mode 100644 index 0000000000..82f8c2dbc6 --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_client.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +RANK=$1 +RUN_ID=$2 +python3 torch_client.py --cf config/foolsgold/fedml_config.yaml --rank $RANK --role client --run_id $RUN_ID \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_mpi.sh b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_mpi.sh new file mode 100644 index 0000000000..ca02e299c0 --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_mpi.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +WORKER_NUM=$1 + +PROCESS_NUM=`expr $WORKER_NUM + 1` +echo $PROCESS_NUM + +hostname > mpi_host_file + +mpirun -np $PROCESS_NUM \ +-hostfile mpi_host_file \ +python torch_mpi.py --cf config/foolsgold/fedml_config.yaml \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_server.sh b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_server.sh new file mode 100644 index 0000000000..2714f440f5 --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/run_server.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +RUN_ID=$1 +python3 torch_server.py --cf config/foolsgold/fedml_config.yaml --rank 0 --role server --run_id $RUN_ID \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_client.py b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_client.py new file mode 100644 index 0000000000..0d507a94cb --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_client.py @@ -0,0 +1,31 @@ +import logging + +import fedml +from fedml import FedMLRunner +from fedml.model.cv.resnet import resnet56 + + +def create_model(): + # please download the pre-trained weight file from + # https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet44-2a3cabcb.pt + pre_trained_model_path = "./config/resnet56_on_cifar10.pth" + model = resnet56(10, pretrained=True, path=pre_trained_model_path) + logging.info("load pretrained model successfully") + return model + + +if __name__ == "__main__": + args = fedml.init() + + # init device + device = fedml.device.get_device(args) + + # load data + dataset, output_dim = fedml.data.load(args) + + # load model + model = create_model() + + # start training + fedml_runner = FedMLRunner(args, device, dataset, model) + fedml_runner.run() diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_mpi.py b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_mpi.py new file mode 100644 index 0000000000..16e1939a95 --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_mpi.py @@ -0,0 +1,34 @@ +import logging +import fedml +from fedml import FedMLRunner +from fedml.model.cv.resnet import resnet56 + + +def create_model(): + """ + load pretrained model... + please download the pre-trained weight file from + https://github.com/FedML-AI/FedML/blob/fedml_v0.6_before_fundraising/fedml_api/model/cv/pretrained/CIFAR10/resnet56/best.pth + and rename the file to ``resnet56_on_cifar10.pth'' + """ + pre_trained_model_path = "./config/resnet56_on_cifar10.pth" + model = resnet56(10, pretrained=True, path=pre_trained_model_path) + logging.info("load pretrained model successfully") + return model + + +if __name__ == "__main__": + args = fedml.init() + + # init device + device = fedml.device.get_device(args) + + # load data + dataset, output_dim = fedml.data.load(args) + + # load model + model = fedml.model.create(args, output_dim) + + # start training + fedml_runner = FedMLRunner(args, device, dataset, model) + fedml_runner.run() diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_server.py b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_server.py new file mode 100644 index 0000000000..0d507a94cb --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/torch_server.py @@ -0,0 +1,31 @@ +import logging + +import fedml +from fedml import FedMLRunner +from fedml.model.cv.resnet import resnet56 + + +def create_model(): + # please download the pre-trained weight file from + # https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet44-2a3cabcb.pt + pre_trained_model_path = "./config/resnet56_on_cifar10.pth" + model = resnet56(10, pretrained=True, path=pre_trained_model_path) + logging.info("load pretrained model successfully") + return model + + +if __name__ == "__main__": + args = fedml.init() + + # init device + device = fedml.device.get_device(args) + + # load data + dataset, output_dim = fedml.data.load(args) + + # load model + model = create_model() + + # start training + fedml_runner = FedMLRunner(args, device, dataset, model) + fedml_runner.run() diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/foolsgold/fedml_config.yaml b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/foolsgold/fedml_config.yaml index 7d87ad567f..ee707b773a 100644 --- a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/foolsgold/fedml_config.yaml +++ b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/foolsgold/fedml_config.yaml @@ -9,13 +9,13 @@ environment_args: bootstrap: config/bootstrap.sh data_args: - dataset: "cifar10" + dataset: "mnist" data_cache_dir: ~/fedml_data partition_method: "homo" partition_alpha: 0.5 model_args: - model: "resnet56" + model: "lr" model_file_cache_folder: "./model_file_cache" # will be filled by the server automatically global_model_file_path: "./model_file_cache/global_model.pt" @@ -24,8 +24,8 @@ train_args: # for CLI running, this can be None; in MLOps deployment, `client_id_list` will be replaced with real-time selected devices client_id_list: # for FoolsGold Defense, if use_memory is true, then client_num_in_total should be equal to client_number_per_round - client_num_in_total: 8 - client_num_per_round: 8 + client_num_in_total: 4 + client_num_per_round: 4 comm_round: 10 epochs: 1 batch_size: 10 @@ -37,13 +37,13 @@ validation_args: frequency_of_the_test: 1 device_args: - worker_num: 8 - using_gpu: true + worker_num: 4 + using_gpu: false gpu_mapping_file: config/foolsgold/gpu_mapping.yaml gpu_mapping_key: mapping_default comm_args: - backend: "MPI" + backend: "MQTT_S3" tracking_args: # the default log path is at ~/fedml-client/fedml/logs/ and ~/fedml-server/fedml/logs/ diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/resnet56_on_cifar10.pth b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/resnet56_on_cifar10.pth new file mode 100644 index 0000000000..d16cb52209 Binary files /dev/null and b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/resnet56_on_cifar10.pth differ diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_client.py b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_client.py index 0d507a94cb..b4bd2b9db8 100644 --- a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_client.py +++ b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_client.py @@ -1,13 +1,16 @@ import logging - import fedml from fedml import FedMLRunner from fedml.model.cv.resnet import resnet56 def create_model(): - # please download the pre-trained weight file from - # https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet44-2a3cabcb.pt + """ + loading pretrained model... + please download the pre-trained weight file from + https://github.com/FedML-AI/FedML/blob/fedml_v0.6_before_fundraising/fedml_api/model/cv/pretrained/CIFAR10/resnet56/best.pth + and rename the file to ``resnet56_on_cifar10.pth'' + """ pre_trained_model_path = "./config/resnet56_on_cifar10.pth" model = resnet56(10, pretrained=True, path=pre_trained_model_path) logging.info("load pretrained model successfully") @@ -24,7 +27,7 @@ def create_model(): dataset, output_dim = fedml.data.load(args) # load model - model = create_model() + model = fedml.model.create(args, output_dim) # start training fedml_runner = FedMLRunner(args, device, dataset, model) diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_mpi.py b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_mpi.py index 0d507a94cb..b4bd2b9db8 100644 --- a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_mpi.py +++ b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_mpi.py @@ -1,13 +1,16 @@ import logging - import fedml from fedml import FedMLRunner from fedml.model.cv.resnet import resnet56 def create_model(): - # please download the pre-trained weight file from - # https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet44-2a3cabcb.pt + """ + loading pretrained model... + please download the pre-trained weight file from + https://github.com/FedML-AI/FedML/blob/fedml_v0.6_before_fundraising/fedml_api/model/cv/pretrained/CIFAR10/resnet56/best.pth + and rename the file to ``resnet56_on_cifar10.pth'' + """ pre_trained_model_path = "./config/resnet56_on_cifar10.pth" model = resnet56(10, pretrained=True, path=pre_trained_model_path) logging.info("load pretrained model successfully") @@ -24,7 +27,7 @@ def create_model(): dataset, output_dim = fedml.data.load(args) # load model - model = create_model() + model = fedml.model.create(args, output_dim) # start training fedml_runner = FedMLRunner(args, device, dataset, model) diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_server.py b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_server.py index 0d507a94cb..b4bd2b9db8 100644 --- a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_server.py +++ b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/torch_server.py @@ -1,13 +1,16 @@ import logging - import fedml from fedml import FedMLRunner from fedml.model.cv.resnet import resnet56 def create_model(): - # please download the pre-trained weight file from - # https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet44-2a3cabcb.pt + """ + loading pretrained model... + please download the pre-trained weight file from + https://github.com/FedML-AI/FedML/blob/fedml_v0.6_before_fundraising/fedml_api/model/cv/pretrained/CIFAR10/resnet56/best.pth + and rename the file to ``resnet56_on_cifar10.pth'' + """ pre_trained_model_path = "./config/resnet56_on_cifar10.pth" model = resnet56(10, pretrained=True, path=pre_trained_model_path) logging.info("load pretrained model successfully") @@ -24,7 +27,7 @@ def create_model(): dataset, output_dim = fedml.data.load(args) # load model - model = create_model() + model = fedml.model.create(args, output_dim) # start training fedml_runner = FedMLRunner(args, device, dataset, model) diff --git a/python/examples/simulation/mpi_torch_async_fedavg/config/gpu_mapping.yaml b/python/examples/simulation/mpi_torch_async_fedavg/config/gpu_mapping.yaml index 1ba657af36..446c75e24a 100644 --- a/python/examples/simulation/mpi_torch_async_fedavg/config/gpu_mapping.yaml +++ b/python/examples/simulation/mpi_torch_async_fedavg/config/gpu_mapping.yaml @@ -1,4 +1,3 @@ -# Please check "GPU_MAPPING.md" to see how to define the topology # You can define a cluster containing multiple GPUs within multiple machines by defining `gpu_mapping.yaml` as follows: # config_cluster0: diff --git a/python/examples/simulation/mpi_torch_fedavg/config/gpu_mapping.yaml b/python/examples/simulation/mpi_torch_fedavg/config/gpu_mapping.yaml index 1ba657af36..446c75e24a 100644 --- a/python/examples/simulation/mpi_torch_fedavg/config/gpu_mapping.yaml +++ b/python/examples/simulation/mpi_torch_fedavg/config/gpu_mapping.yaml @@ -1,4 +1,3 @@ -# Please check "GPU_MAPPING.md" to see how to define the topology # You can define a cluster containing multiple GPUs within multiple machines by defining `gpu_mapping.yaml` as follows: # config_cluster0: diff --git a/python/fedml/__init__.py b/python/fedml/__init__.py index e38e1e2dc9..87fc4aa25b 100644 --- a/python/fedml/__init__.py +++ b/python/fedml/__init__.py @@ -23,7 +23,7 @@ _global_training_type = None _global_comm_backend = None -__version__ = "0.7.327" +__version__ = "0.7.330" def init(args=None): diff --git a/python/fedml/cli/env/collect_env.py b/python/fedml/cli/env/collect_env.py index 19e7295eda..12775413e3 100644 --- a/python/fedml/cli/env/collect_env.py +++ b/python/fedml/cli/env/collect_env.py @@ -64,5 +64,17 @@ def collect_env(): ) ) nvidia_smi.nvmlShutdown() + + import torch + + torch_is_available = torch.cuda.is_available() + print("torch_is_available = {}".format(torch_is_available)) + + device_count = torch.cuda.device_count() + print("device_count = {}".format(device_count)) + + device_name = torch.cuda.get_device_name(0) + print("device_name = {}".format(device_name)) + except: print("No GPU devices") diff --git a/python/fedml/core/alg_frame/client_trainer.py b/python/fedml/core/alg_frame/client_trainer.py index 053636a53d..513b34e67c 100644 --- a/python/fedml/core/alg_frame/client_trainer.py +++ b/python/fedml/core/alg_frame/client_trainer.py @@ -18,11 +18,19 @@ def __init__(self, model, args): self.model = model self.id = 0 self.args = args + self.local_train_dataset = None + self.local_test_dataset = None + self.local_sample_number = 0 FedMLDifferentialPrivacy.get_instance().init(args) def set_id(self, trainer_id): self.id = trainer_id + def update_dataset(self, local_train_dataset, local_test_dataset, local_sample_number): + self.local_train_dataset = local_train_dataset + self.local_test_dataset = local_test_dataset + self.local_sample_number = local_sample_number + @abstractmethod def get_model_params(self): pass diff --git a/python/fedml/core/alg_frame/server_aggregator.py b/python/fedml/core/alg_frame/server_aggregator.py index fa59102efa..1db93002fb 100644 --- a/python/fedml/core/alg_frame/server_aggregator.py +++ b/python/fedml/core/alg_frame/server_aggregator.py @@ -38,7 +38,7 @@ def on_before_aggregation( if FedMLAttacker.get_instance().is_model_attack(): raw_client_model_or_grad_list = FedMLAttacker.get_instance().attack_model( raw_client_grad_list=raw_client_model_or_grad_list, - extra_auxiliary_info=None, + extra_auxiliary_info=self.get_model_params(), ) if FedMLDefender.get_instance().is_defense_enabled(): raw_client_model_or_grad_list = FedMLDefender.get_instance().defend_before_aggregation( diff --git a/python/fedml/core/security/attack/byzantine_attack.py b/python/fedml/core/security/attack/byzantine_attack.py index 3a83cac758..0dd72d1dce 100644 --- a/python/fedml/core/security/attack/byzantine_attack.py +++ b/python/fedml/core/security/attack/byzantine_attack.py @@ -1,4 +1,5 @@ import random +import fedml import numpy as np import torch from .attack_base import BaseAttackMethod @@ -14,25 +15,26 @@ class ByzantineAttack(BaseAttackMethod): def __init__(self, args): self.byzantine_client_num = args.byzantine_client_num self.attack_mode = args.attack_mode # random: randomly generate a weight; zero: set the weight to 0 + self.device = fedml.device.get_device(args) def attack_model(self, raw_client_grad_list: List[Tuple[float, Dict]], extra_auxiliary_info: Any = None): if len(raw_client_grad_list) < self.byzantine_client_num: self.byzantine_client_num = len(raw_client_grad_list) + byzantine_idxs = self._get_malicious_client_idx(len(raw_client_grad_list)) + print(f"byzantine_idxs={byzantine_idxs}") if self.attack_mode == "zero": - byzantine_local_w = self._attack_zero_mode(raw_client_grad_list) + byzantine_local_w = self._attack_zero_mode(raw_client_grad_list, byzantine_idxs) elif self.attack_mode == "random": - byzantine_local_w = self._attack_random_mode(raw_client_grad_list) + byzantine_local_w = self._attack_random_mode(raw_client_grad_list, byzantine_idxs) elif self.attack_mode == "flip": - byzantine_local_w = self._attack_flip_mode(raw_client_grad_list, extra_auxiliary_info) # extra_auxiliary_info: global model + byzantine_local_w = self._attack_flip_mode(raw_client_grad_list, byzantine_idxs, extra_auxiliary_info) # extra_auxiliary_info: global model else: raise NotImplementedError("Method not implemented!") return byzantine_local_w - def _attack_zero_mode(self, model_list): - byzantine_idxs = self._get_malicious_client_idx(len(model_list)) + def _attack_zero_mode(self, model_list, byzantine_idxs): new_model_list = [] - for i in range(0, len(model_list)): if i not in byzantine_idxs: new_model_list.append(model_list[i]) @@ -40,12 +42,11 @@ def _attack_zero_mode(self, model_list): local_sample_number, local_model_params = model_list[i] for k in local_model_params.keys(): if is_weight_param(k): - local_model_params[k] = torch.from_numpy(np.zeros(local_model_params[k].size())).float() + local_model_params[k] = torch.from_numpy(np.zeros(local_model_params[k].size())).float().to(self.device) new_model_list.append((local_sample_number, local_model_params)) return new_model_list - def _attack_random_mode(self, model_list): - byzantine_idxs = self._get_malicious_client_idx(len(model_list)) + def _attack_random_mode(self, model_list, byzantine_idxs): new_model_list = [] for i in range(0, len(model_list)): @@ -55,13 +56,12 @@ def _attack_random_mode(self, model_list): local_sample_number, local_model_params = model_list[i] for k in local_model_params.keys(): if is_weight_param(k): - local_model_params[k] = torch.from_numpy(np.random.random_sample(size=local_model_params[k].size())).float() + local_model_params[k] = torch.from_numpy(np.random.random_sample(size=local_model_params[k].size())).float().to(self.device) new_model_list.append((local_sample_number, local_model_params)) return new_model_list - def _attack_flip_mode(self, model_list, global_model): - byzantine_idxs = self._get_malicious_client_idx(len(model_list)) + def _attack_flip_mode(self, model_list, byzantine_idxs, global_model): new_model_list = [] for i in range(0, len(model_list)): if i not in byzantine_idxs: diff --git a/python/fedml/core/security/constants.py b/python/fedml/core/security/constants.py index e199787223..963806dd34 100644 --- a/python/fedml/core/security/constants.py +++ b/python/fedml/core/security/constants.py @@ -10,6 +10,7 @@ DEFENSE_FOOLSGOLD = "foolsgold" DEFENSE_CRFL = "crfl" DEFENSE_THREESIGMA = "3sigma" +DEFENSE_THREESIGMA_GEOMEDIAN = "3sigma_geo" DEFENSE_MULTIKRUM = "multikrum" DEFENSE_TRIMMED_MEAN = "trimmed_mean" diff --git a/python/fedml/core/security/defense/foolsgold_defense.py b/python/fedml/core/security/defense/foolsgold_defense.py index 6dc109cf98..12c99ac6ea 100644 --- a/python/fedml/core/security/defense/foolsgold_defense.py +++ b/python/fedml/core/security/defense/foolsgold_defense.py @@ -1,8 +1,6 @@ from typing import Callable, List, Tuple, Dict, Any - import numpy as np from scipy import spatial - from .defense_base import BaseDefenseMethod """ @@ -35,7 +33,7 @@ def defend_before_aggregation( ): client_num = len(raw_client_grad_list) importance_feature_list = self._get_importance_feature(raw_client_grad_list) - print(len(importance_feature_list)) + # print(len(importance_feature_list)) if self.memory is None: self.memory = importance_feature_list @@ -78,7 +76,7 @@ def fools_gold_score(feature_vec_list): alpha[alpha <= 0.0] = 1e-15 # Rescale so that max value is alpha - print(np.max(alpha)) + # print(np.max(alpha)) alpha = alpha / np.max(alpha) alpha[(alpha == 1.0)] = 0.999999 @@ -87,8 +85,6 @@ def fools_gold_score(feature_vec_list): alpha[(np.isinf(alpha) + alpha > 1)] = 1 alpha[(alpha < 0)] = 0 - print("alpha = {}".format(alpha)) - return alpha def _get_importance_feature(self, raw_client_grad_list): @@ -100,7 +96,7 @@ def _get_importance_feature(self, raw_client_grad_list): # Get last key-value tuple (weight_name, importance_feature) = list(grads.items())[-2] - print(importance_feature) + # print(importance_feature) feature_len = np.array(importance_feature.cpu().data.detach().numpy().shape).prod() feature_vector = np.reshape(importance_feature.cpu().data.detach().numpy(), feature_len) ret_feature_vector_list.append(feature_vector) diff --git a/python/fedml/core/security/defense/three_sigma_defense.py b/python/fedml/core/security/defense/three_sigma_defense.py index e4dca0b89e..fd05d243f7 100644 --- a/python/fedml/core/security/defense/three_sigma_defense.py +++ b/python/fedml/core/security/defense/three_sigma_defense.py @@ -54,7 +54,7 @@ def __init__(self, config): ): self.pretraining_round_number = config.pretraining_round_num else: - self.pretraining_round_number = 5 + self.pretraining_round_number = 2 # ----------------- params for normal distribution ----------------- # self.mu = 0 self.sigma = 0 diff --git a/python/fedml/core/security/defense/three_sigma_geomedian_defense.py b/python/fedml/core/security/defense/three_sigma_geomedian_defense.py new file mode 100644 index 0000000000..57e85dc3fb --- /dev/null +++ b/python/fedml/core/security/defense/three_sigma_geomedian_defense.py @@ -0,0 +1,193 @@ +import math +import numpy as np +from .defense_base import BaseDefenseMethod +from typing import Callable, List, Tuple, Dict, Any +from scipy import spatial +from ..common.bucket import Bucket +from ..common.utils import compute_geometric_median, compute_euclidean_distance +import torch + + +class ThreeSigmaGeoMedianDefense(BaseDefenseMethod): + def __init__(self, config): + self.memory = None + self.iteration_num = 1 + self.score_list = [] + self.geo_median = None + + if hasattr(config, "bucketing_batch_size") and isinstance(config.bucketing_batch_size, int): + self.bucketing_batch_size = config.bucketing_batch_size + else: + self.bucketing_batch_size = 1 + if hasattr(config, "pretraining_round_num") and isinstance( + config.pretraining_round_num, int + ): + self.pretraining_round_number = config.pretraining_round_num + else: + self.pretraining_round_number = 2 + # ----------------- params for normal distribution ----------------- # + self.mu = 0 + self.sigma = 0 + self.upper_bound = 0 + self.lower_bound = 0 + self.bound_param = 1 # values outside mu +- sigma are outliers + + if hasattr(config, "to_keep_higher_scores") and isinstance(config.to_keep_higher_scores, bool): + self.to_keep_higher_scores = config.to_keep_higher_scores + else: + self.to_keep_higher_scores = False # true or false, depending on the score algo + self.score_function = "l2" + + def run( + self, + raw_client_grad_list: List[Tuple[float, Dict]], + base_aggregation_func: Callable = None, + extra_auxiliary_info: Any = None, + ): + grad_list = self.defend_before_aggregation( + raw_client_grad_list, extra_auxiliary_info + ) + return self.defend_on_aggregation( + grad_list, base_aggregation_func, extra_auxiliary_info + ) + + def defend_before_aggregation( + self, + raw_client_grad_list: List[Tuple[float, Dict]], + extra_auxiliary_info: Any = None, + ): + # grad_list = [grad for (_, grad) in raw_client_grad_list] + client_scores = self.compute_client_scores(raw_client_grad_list) + print(f"client scores = {client_scores}") + if self.iteration_num < self.pretraining_round_number: + self.score_list.extend(list(client_scores)) + self.mu, self.sigma = self.compute_gaussian_distribution() + # if self.mu + self.bound_param * self.sigma >= 0: + self.upper_bound = self.mu + self.bound_param * self.sigma + # if self.mu - self.bound_param * self.sigma <= 0: + self.lower_bound = self.mu - self.bound_param * self.sigma + self.iteration_num += 1 + + for i in range( + len(client_scores) - 1, -1, -1 + ): # traverse the score list in a reversed order + if ( + not self.to_keep_higher_scores and client_scores[i] > self.upper_bound + ) or (self.to_keep_higher_scores and client_scores[i] < self.lower_bound): + # here we do not remove the score in self.score_list to avoid mis-deleting + # due to severe non-iid among clients + raw_client_grad_list.pop(i) + print(f"pop -- i = {i}") + batch_grad_list = Bucket.bucketization( + raw_client_grad_list, self.bucketing_batch_size + ) + return batch_grad_list + + # def defend_on_aggregation( + # self, + # raw_client_grad_list: List[Tuple[float, Dict]], + # base_aggregation_func: Callable = None, + # extra_auxiliary_info: Any = None, + # ): # raw_client_grad_list: batch_grad_list + # # ----------- geometric median part, or just use base_aggregation_func ------------- + # # todo: why geometric median? what about other approaches? + # (num0, avg_params) = raw_client_grad_list[0] + # alphas = {alpha for (alpha, params) in raw_client_grad_list} + # alphas = {alpha / sum(alphas, 0.0) for alpha in alphas} + # for k in avg_params.keys(): + # batch_grads = [params[k] for (alpha, params) in raw_client_grad_list] + # avg_params[k] = compute_geometric_median(alphas, batch_grads) + # return avg_params + + def compute_gaussian_distribution(self): + n = len(self.score_list) + mu = sum(list(self.score_list)) / n + temp = 0 + + for i in range(len(self.score_list)): + temp = (((self.score_list[i] - mu) ** 2) / (n - 1)) + temp + sigma = math.sqrt(temp) + print(f"mu = {mu}, sigma = {sigma}") + return mu, sigma + + def compute_client_scores(self, raw_client_grad_list): + importance_feature_list = self._get_importance_feature(raw_client_grad_list) + if self.score_function == "foolsgold": + if self.memory is None: + self.memory = importance_feature_list + else: # memory: potential bugs: grads in different iterations may be from different clients + for i in range(len(raw_client_grad_list)): + self.memory[i] += importance_feature_list[i] + return self.fools_gold_score(self.memory) + if self.score_function == "l2": + if self.geo_median is None: + # (num0, avg_params) = raw_client_grad_list[0] + # alphas = {alpha for (alpha, params) in raw_client_grad_list} + # alphas = {alpha / sum(alphas, 0.0) for alpha in alphas} + alphas = [1/len(raw_client_grad_list)] * len(raw_client_grad_list) + self.geo_median = compute_geometric_median(alphas, importance_feature_list) + return self.l2_scores(importance_feature_list) + + def l2_scores(self, importance_feature_list): + scores = [] + for feature in importance_feature_list: + score = compute_euclidean_distance(torch.Tensor(feature), self.geo_median) + scores.append(score) + return scores + + + def _get_importance_feature(self, raw_client_grad_list): + # print(f"raw_client_grad_list = {raw_client_grad_list}") + # Foolsgold uses the last layer's gradient/weights as the importance feature. + ret_feature_vector_list = [] + for idx in range(len(raw_client_grad_list)): + raw_grad = raw_client_grad_list[idx] + (p, grads) = raw_grad + + # Get last key-value tuple + (weight_name, importance_feature) = list(grads.items())[-2] + # print(importance_feature) + feature_len = np.array( + importance_feature.cpu().data.detach().numpy().shape + ).prod() + feature_vector = np.reshape( + importance_feature.cpu().data.detach().numpy(), feature_len + ) + ret_feature_vector_list.append(feature_vector) + return ret_feature_vector_list + + @staticmethod + def fools_gold_score(feature_vec_list): + n_clients = len(feature_vec_list) + cs = np.zeros((n_clients, n_clients)) + for i in range(n_clients): + for j in range(n_clients): + cs[i][j] = 1 - spatial.distance.cosine( + feature_vec_list[i], feature_vec_list[j] + ) + cs -= np.eye(n_clients) + maxcs = np.max(cs, axis=1) + # pardoning + for i in range(n_clients): + for j in range(n_clients): + if i == j: + continue + if maxcs[i] < maxcs[j]: + cs[i][j] = cs[i][j] * maxcs[i] / maxcs[j] + alpha = 1 - (np.max(cs, axis=1)) + alpha[alpha > 1.0] = 1.0 + alpha[alpha <= 0.0] = 1e-15 + + # Rescale so that max value is alpha + # print(np.max(alpha)) + alpha = alpha / np.max(alpha) + alpha[(alpha == 1.0)] = 0.999999 + + # Logit function + alpha = np.log(alpha / (1 - alpha)) + 0.5 + # alpha[(np.isinf(alpha) + alpha > 1)] = 1 + # alpha[(alpha < 0)] = 0 + + print("alpha = {}".format(alpha)) + + return alpha \ No newline at end of file diff --git a/python/fedml/core/security/defense/three_sigma_krum_defense.py b/python/fedml/core/security/defense/three_sigma_krum_defense.py new file mode 100644 index 0000000000..ae64eeacb1 --- /dev/null +++ b/python/fedml/core/security/defense/three_sigma_krum_defense.py @@ -0,0 +1,166 @@ +import math +import numpy as np +from .defense_base import BaseDefenseMethod +from typing import Callable, List, Tuple, Dict, Any +from ..common.bucket import Bucket +from ..common.utils import compute_euclidean_distance, compute_middle_point +import torch + + +class ThreeSigmaKrumDefense(BaseDefenseMethod): + def __init__(self, config): + self.memory = None + self.iteration_num = 1 + self.score_list = [] + self.median = None + + if hasattr(config, "bucketing_batch_size") and isinstance( + config.bucketing_batch_size, int + ): + self.bucketing_batch_size = config.bucketing_batch_size + else: + self.bucketing_batch_size = 1 + if hasattr(config, "pretraining_round_num") and isinstance( + config.pretraining_round_num, int + ): + self.pretraining_round_number = config.pretraining_round_num + else: + self.pretraining_round_number = 2 + # ----------------- params for normal distribution ----------------- # + self.mu = 0 + self.sigma = 0 + self.upper_bound = 0 + self.lower_bound = 0 + self.bound_param = 1 # values outside mu +- sigma are outliers + + if hasattr(config, "to_keep_higher_scores") and isinstance( + config.to_keep_higher_scores, bool + ): + self.to_keep_higher_scores = config.to_keep_higher_scores + else: + self.to_keep_higher_scores = ( + False # true or false, depending on the score algo + ) + self.score_function = "l2" + + def run( + self, + raw_client_grad_list: List[Tuple[float, Dict]], + base_aggregation_func: Callable = None, + extra_auxiliary_info: Any = None, + ): + grad_list = self.defend_before_aggregation( + raw_client_grad_list, extra_auxiliary_info + ) + return self.defend_on_aggregation( + grad_list, base_aggregation_func, extra_auxiliary_info + ) + + def defend_before_aggregation( + self, + raw_client_grad_list: List[Tuple[float, Dict]], + extra_auxiliary_info: Any = None, + ): + client_scores = self.compute_client_scores(raw_client_grad_list) + print(f"client scores = {client_scores}") + if self.iteration_num < self.pretraining_round_number: + self.score_list.extend(list(client_scores)) + self.mu, self.sigma = self.compute_gaussian_distribution() + # if self.mu + self.bound_param * self.sigma >= 0: + self.upper_bound = self.mu + self.bound_param * self.sigma + # if self.mu - self.bound_param * self.sigma <= 0: + self.lower_bound = self.mu - self.bound_param * self.sigma + self.iteration_num += 1 + + for i in range( + len(client_scores) - 1, -1, -1 + ): # traverse the score list in a reversed order + if ( + not self.to_keep_higher_scores and client_scores[i] > self.upper_bound + ) or (self.to_keep_higher_scores and client_scores[i] < self.lower_bound): + # here we do not remove the score in self.score_list to avoid mis-deleting + # due to severe non-iid among clients + raw_client_grad_list.pop(i) + print(f"pop -- i = {i}") + batch_grad_list = Bucket.bucketization( + raw_client_grad_list, self.bucketing_batch_size + ) + return batch_grad_list + + def compute_gaussian_distribution(self): + n = len(self.score_list) + mu = sum(list(self.score_list)) / n + temp = 0 + + for i in range(len(self.score_list)): + temp = (((self.score_list[i] - mu) ** 2) / (n - 1)) + temp + sigma = math.sqrt(temp) + print(f"mu = {mu}, sigma = {sigma}") + return mu, sigma + + def compute_client_scores(self, raw_client_grad_list): + importance_feature_list = self._get_importance_feature(raw_client_grad_list) + if self.score_function == "l2": + if self.median is None: + # (num0, avg_params) = raw_client_grad_list[0] + # alphas = {alpha for (alpha, params) in raw_client_grad_list} + # alphas = {alpha / sum(alphas, 0.0) for alpha in alphas} + krum_scores = self._compute_krum_score(importance_feature_list) + + score_index = torch.argsort( + torch.Tensor(krum_scores) + ).tolist() # indices; ascending + score_index = score_index[0 : math.floor(len(raw_client_grad_list) / 2)] + alphas = [1 / len(raw_client_grad_list)] * len(raw_client_grad_list) + honest_importance_feature_list = [ + importance_feature_list[i] for i in score_index + ] + self.median = compute_middle_point( + alphas, honest_importance_feature_list + ) + return self.l2_scores(importance_feature_list) + + def l2_scores(self, importance_feature_list): + scores = [] + for feature in importance_feature_list: + score = compute_euclidean_distance(torch.Tensor(feature), self.median) + scores.append(score) + return scores + + def _compute_krum_score(self, vec_grad_list): + krum_scores = [] + num_client = len(vec_grad_list) + for i in range(0, num_client): + dists = [] + for j in range(0, num_client): + if i != j: + dists.append( + compute_euclidean_distance( + torch.Tensor(vec_grad_list[i]), + torch.Tensor(vec_grad_list[j]), + ).item() + ) + dists.sort() # ascending + score = dists[0 : math.floor(num_client / 2)] + krum_scores.append(sum(score)) + return krum_scores + + def _get_importance_feature(self, raw_client_grad_list): + # print(f"raw_client_grad_list = {raw_client_grad_list}") + # Foolsgold uses the last layer's gradient/weights as the importance feature. + ret_feature_vector_list = [] + for idx in range(len(raw_client_grad_list)): + raw_grad = raw_client_grad_list[idx] + (p, grads) = raw_grad + + # Get last key-value tuple + (weight_name, importance_feature) = list(grads.items())[-2] + # print(importance_feature) + feature_len = np.array( + importance_feature.cpu().data.detach().numpy().shape + ).prod() + feature_vector = np.reshape( + importance_feature.cpu().data.detach().numpy(), feature_len + ) + ret_feature_vector_list.append(feature_vector) + return ret_feature_vector_list diff --git a/python/fedml/core/security/fedml_defender.py b/python/fedml/core/security/fedml_defender.py index b4e2060d3a..8fdaee78ef 100644 --- a/python/fedml/core/security/fedml_defender.py +++ b/python/fedml/core/security/fedml_defender.py @@ -4,6 +4,7 @@ from .defense.coordinate_wise_trimmed_mean_defense import CoordinateWiseTrimmedMeanDefense from .defense.crfl_defense import CRFLDefense from .defense.three_sigma_defense import ThreeSigmaDefense +from .defense.three_sigma_geomedian_defense import ThreeSigmaGeoMedianDefense from ..common.ml_engine_backend import MLEngineBackend from .defense.cclip_defense import CClipDefense from .defense.foolsgold_defense import FoolsGoldDefense @@ -27,6 +28,7 @@ DEFENSE_CRFL, DEFENSE_MULTIKRUM, DEFENSE_TRIMMED_MEAN, + DEFENSE_THREESIGMA_GEOMEDIAN, ) @@ -73,6 +75,8 @@ def init(self, args): self.defender = FoolsGoldDefense(args) elif self.defense_type == DEFENSE_THREESIGMA: self.defender = ThreeSigmaDefense(args) + elif self.defense_type == DEFENSE_THREESIGMA_GEOMEDIAN: + self.defender = ThreeSigmaGeoMedianDefense(args) elif self.defense_type == DEFENSE_CRFL: self.defender = CRFLDefense(args) elif self.defense_type == DEFENSE_TRIMMED_MEAN: @@ -125,6 +129,7 @@ def is_defense_before_aggregation(self): DEFENSE_SLSGD, DEFENSE_FOOLSGOLD, DEFENSE_THREESIGMA, + DEFENSE_THREESIGMA_GEOMEDIAN, DEFENSE_KRUM, DEFENSE_MULTIKRUM, DEFENSE_TRIMMED_MEAN diff --git a/python/fedml/cross_silo/client/fedml_client_master_manager.py b/python/fedml/cross_silo/client/fedml_client_master_manager.py index 69b2625ee5..f39640bfed 100644 --- a/python/fedml/cross_silo/client/fedml_client_master_manager.py +++ b/python/fedml/cross_silo/client/fedml_client_master_manager.py @@ -71,8 +71,8 @@ def handle_message_init(self, msg_params): global_model_params = convert_model_params_to_ddp(global_model_params) self.sync_process_group(0, global_model_params, data_silo_index) - self.trainer_dist_adapter.update_model(global_model_params) self.trainer_dist_adapter.update_dataset(int(data_silo_index)) + self.trainer_dist_adapter.update_model(global_model_params) self.round_idx = 0 self.__train() @@ -86,9 +86,9 @@ def handle_message_receive_model_from_server(self, msg_params): model_params = convert_model_params_to_ddp(model_params) self.sync_process_group(self.round_idx, model_params, client_index) - self.trainer_dist_adapter.update_model(model_params) self.trainer_dist_adapter.update_dataset(int(client_index)) logging.info("current roundx {}, num rounds {}".format(self.round_idx, self.num_rounds)) + self.trainer_dist_adapter.update_model(model_params) if self.round_idx == self.num_rounds: mlops.log_training_finished_status() return diff --git a/python/fedml/cross_silo/client/fedml_trainer.py b/python/fedml/cross_silo/client/fedml_trainer.py index b24125bb66..efc673d5f1 100755 --- a/python/fedml/cross_silo/client/fedml_trainer.py +++ b/python/fedml/cross_silo/client/fedml_trainer.py @@ -35,6 +35,7 @@ def __init__( self.device = device self.args = args + self.args.device = device def update_model(self, weights): self.trainer.set_model_params(weights) @@ -47,6 +48,7 @@ def update_dataset(self, client_index): self.train_local = self.train_data_local_dict[client_index] self.local_sample_number = self.train_data_local_num_dict[client_index] self.test_local = self.test_data_local_dict[client_index] + self.trainer.update_dataset(self.train_local, self.test_local, self.local_sample_number) def train(self, round_idx=None): self.args.round_idx = round_idx diff --git a/python/fedml/cross_silo/lightsecagg/lsa_fedml_client_manager.py b/python/fedml/cross_silo/lightsecagg/lsa_fedml_client_manager.py index 3f5c6f7b32..bb9d1185f4 100644 --- a/python/fedml/cross_silo/lightsecagg/lsa_fedml_client_manager.py +++ b/python/fedml/cross_silo/lightsecagg/lsa_fedml_client_manager.py @@ -94,8 +94,8 @@ def handle_message_init(self, msg_params): self.dimensions, self.total_dimension = model_dimension(global_model_params) - self.trainer.update_model(global_model_params) self.trainer.update_dataset(int(client_index)) + self.trainer.update_model(global_model_params) self.round_idx = 0 self.__offline() @@ -118,8 +118,8 @@ def handle_message_receive_model_from_server(self, msg_params): model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS) client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX) - self.trainer.update_model(model_params) self.trainer.update_dataset(int(client_index)) + self.trainer.update_model(model_params) if self.round_idx == self.num_rounds - 1: mlops.log_training_finished_status() diff --git a/python/fedml/cross_silo/secagg/sa_fedml_client_manager.py b/python/fedml/cross_silo/secagg/sa_fedml_client_manager.py index ee3acfb5af..a984541a8b 100644 --- a/python/fedml/cross_silo/secagg/sa_fedml_client_manager.py +++ b/python/fedml/cross_silo/secagg/sa_fedml_client_manager.py @@ -95,8 +95,9 @@ def handle_message_init(self, msg_params): self.dimensions, self.total_dimension = model_dimension(global_model_params) - self.trainer.update_model(global_model_params) self.trainer.update_dataset(int(client_index)) + self.trainer.update_model(global_model_params) + self.round_idx = 0 self.__offline() @@ -105,8 +106,8 @@ def handle_message_receive_model_from_server(self, msg_params): model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS) client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX) - self.trainer.update_model(model_params) self.trainer.update_dataset(int(client_index)) + self.trainer.update_model(model_params) if self.round_idx == self.num_rounds - 1: mlops.log_training_finished_status() diff --git a/python/fedml/cross_silo/server/fedml_aggregator.py b/python/fedml/cross_silo/server/fedml_aggregator.py index f4480b533e..a2f6aa1e81 100644 --- a/python/fedml/cross_silo/server/fedml_aggregator.py +++ b/python/fedml/cross_silo/server/fedml_aggregator.py @@ -4,24 +4,24 @@ import numpy as np import torch - from fedml import mlops + from ...ml.engine import ml_engine_adapter class FedMLAggregator(object): def __init__( - self, - train_global, - test_global, - all_train_data_num, - train_data_local_dict, - test_data_local_dict, - train_data_local_num_dict, - client_num, - device, - args, - server_aggregator, + self, + train_global, + test_global, + all_train_data_num, + train_data_local_dict, + test_data_local_dict, + train_data_local_num_dict, + client_num, + device, + args, + server_aggregator, ): self.aggregator = server_aggregator @@ -37,6 +37,7 @@ def __init__( self.client_num = client_num self.device = device + self.args.device = device logging.info("self.device = {}".format(self.device)) self.model_dict = dict() self.sample_num_dict = dict() @@ -53,7 +54,9 @@ def set_global_model_params(self, model_parameters): def add_local_trained_result(self, index, model_params, sample_num): logging.info("add_model. index = %d" % index) - model_params = ml_engine_adapter.model_params_to_device(self.args, model_params, self.device) + # for dictionary model_params, we let the user level code to control the device + if type(model_params) is not dict: + model_params = ml_engine_adapter.model_params_to_device(self.args, model_params, self.device) self.model_dict[index] = model_params self.sample_num_dict[index] = sample_num @@ -76,7 +79,12 @@ def aggregate(self): model_list.append((self.sample_num_dict[idx], self.model_dict[idx])) model_list = self.aggregator.on_before_aggregation(model_list) averaged_params = self.aggregator.aggregate(model_list) - averaged_params = self.aggregator.on_after_aggregation(averaged_params) + + if type(averaged_params) is dict: + for client_index in range(len(averaged_params)): + averaged_params[client_index] = self.aggregator.on_after_aggregation(averaged_params[client_index]) + else: + averaged_params = self.aggregator.on_after_aggregation(averaged_params) self.set_global_model_params(averaged_params) diff --git a/python/fedml/cross_silo/server/fedml_server_manager.py b/python/fedml/cross_silo/server/fedml_server_manager.py index 8cc799398b..649cbca556 100644 --- a/python/fedml/cross_silo/server/fedml_server_manager.py +++ b/python/fedml/cross_silo/server/fedml_server_manager.py @@ -3,6 +3,7 @@ import time from fedml import mlops + from .message_define import MyMessage from ...core.distributed.communication.message import Message from ...core.distributed.fedml_comm_manager import FedMLCommManager @@ -11,7 +12,7 @@ class FedMLServerManager(FedMLCommManager): def __init__( - self, args, aggregator, comm=None, client_rank=0, client_num=0, backend="MQTT_S3", + self, args, aggregator, comm=None, client_rank=0, client_num=0, backend="MQTT_S3", ): super().__init__(args, comm, client_rank, client_num, backend) self.args = args @@ -44,7 +45,7 @@ def send_init_msg(self): def register_message_receive_handlers(self): logging.info("register_message_receive_handlers------") self.register_message_receive_handler( - MyMessage.MSG_TYPE_CONNECTION_IS_READY, self.handle_messag_connection_ready + MyMessage.MSG_TYPE_CONNECTION_IS_READY, self.handle_message_connection_ready ) self.register_message_receive_handler( @@ -55,7 +56,7 @@ def register_message_receive_handlers(self): MyMessage.MSG_TYPE_C2S_SEND_MODEL_TO_SERVER, self.handle_message_receive_model_from_client, ) - def handle_messag_connection_ready(self, msg_params): + def handle_message_connection_ready(self, msg_params): self.client_id_list_in_this_round = self.aggregator.client_selection( self.args.round_idx, self.client_real_ids, self.args.client_num_per_round ) @@ -116,13 +117,6 @@ def handle_message_receive_model_from_client(self, msg_params): b_all_received = self.aggregator.check_whether_all_receive() logging.info("b_all_received = " + str(b_all_received)) if b_all_received: - # if hasattr(self.args, "using_mlops") and self.args.using_mlops: - # self.mlops_event.log_event_ended( - # "server.wait", event_value=str(self.args.round_idx) - # ) - # self.mlops_event.log_event_started( - # "server.agg_and_eval", event_value=str(self.args.round_idx) - # ) mlops.event("server.wait", event_started=False, event_value=str(self.args.round_idx)) mlops.event( "server.agg_and_eval", event_started=True, event_value=str(self.args.round_idx), @@ -150,9 +144,15 @@ def handle_message_receive_model_from_client(self, msg_params): client_idx_in_this_round = 0 for receiver_id in self.client_id_list_in_this_round: - self.send_message_sync_model_to_client( - receiver_id, global_model_params, self.data_silo_index_list[client_idx_in_this_round], - ) + client_index = self.data_silo_index_list[client_idx_in_this_round] + if type(global_model_params) is dict: + self.send_message_sync_model_to_client( + receiver_id, global_model_params[client_index], client_index, + ) + else: + self.send_message_sync_model_to_client( + receiver_id, global_model_params, client_index, + ) client_idx_in_this_round += 1 self.args.round_idx += 1 @@ -161,10 +161,6 @@ def handle_message_receive_model_from_client(self, msg_params): self.cleanup() else: logging.info("\n\n==========end {}-th round training===========\n".format(self.args.round_idx)) - # if hasattr(self.args, "using_mlops") and self.args.using_mlops: - # self.mlops_event.log_event_started( - # "server.wait", event_value=str(self.args.round_idx) - # ) mlops.event("server.wait", event_started=True, event_value=str(self.args.round_idx)) def cleanup(self): @@ -202,7 +198,7 @@ def send_message_finish(self, receive_id, datasilo_index): def send_message_sync_model_to_client(self, receive_id, global_model_params, client_index): tick = time.time() logging.info("send_message_sync_model_to_client. receive_id = %d" % receive_id) - message = Message(MyMessage.MSG_TYPE_S2C_SYNC_MODEL_TO_CLIENT, self.get_sender_id(), receive_id,) + message = Message(MyMessage.MSG_TYPE_S2C_SYNC_MODEL_TO_CLIENT, self.get_sender_id(), receive_id, ) message.add_params(MyMessage.MSG_ARG_KEY_MODEL_PARAMS, global_model_params) message.add_params(MyMessage.MSG_ARG_KEY_CLIENT_INDEX, str(client_index)) message.add_params(MyMessage.MSG_ARG_KEY_CLIENT_OS, "PythonClient") diff --git a/python/fedml/simulation/mpi/async_fedavg/AsyncFedAVGTrainer.py b/python/fedml/simulation/mpi/async_fedavg/AsyncFedAVGTrainer.py index a1233eafab..1265fb298a 100644 --- a/python/fedml/simulation/mpi/async_fedavg/AsyncFedAVGTrainer.py +++ b/python/fedml/simulation/mpi/async_fedavg/AsyncFedAVGTrainer.py @@ -42,9 +42,6 @@ def train(self, round_idx=None): weights = self.trainer.get_model_params() - # transform Tensor to list - if self.args.is_mobile == 1: - weights = transform_tensor_to_list(weights) return weights, self.local_sample_number def test(self): diff --git a/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgClientManager.py b/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgClientManager.py index 936fcfbd33..3e8fb9af07 100644 --- a/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgClientManager.py +++ b/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgClientManager.py @@ -42,8 +42,6 @@ def handle_message_init(self, msg_params): global_model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS) client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX) - if self.args.is_mobile == 1: - global_model_params = transform_list_to_tensor(global_model_params) self.round_idx = 0 self.__train(global_model_params, client_index) @@ -56,9 +54,6 @@ def handle_message_receive_model_from_server(self, msg_params): global_model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS) client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX) - if self.args.is_mobile == 1: - model_params = transform_list_to_tensor(global_model_params) - self.round_idx += 1 self.__train(global_model_params, client_index) if self.round_idx == self.num_rounds - 1: diff --git a/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgServerManager.py b/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgServerManager.py index d6d430e050..2a456df3e5 100644 --- a/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgServerManager.py +++ b/python/fedml/simulation/mpi/async_fedavg/AsyncFedAvgServerManager.py @@ -47,8 +47,6 @@ def send_init_msg(self): range(self.args.client_num_in_total), num_clients, replace=False ) global_model_params = self.aggregator.get_global_model_params() - if self.args.is_mobile == 1: - global_model_params = transform_tensor_to_list(global_model_params) for process_id in range(1, self.size): self.send_message_init_config( process_id, global_model_params, client_indexes[process_id - 1] @@ -96,13 +94,9 @@ def handle_message_receive_model_from_client(self, msg_params): ) global_model_params = self.aggregator.get_global_model_params() - if self.args.is_mobile == 1: - global_model_params = transform_tensor_to_list(global_model_params) print("indexes of clients: " + str(client_indexes)) print("size = %d" % self.size) - if self.args.is_mobile == 1: - global_model_params = transform_tensor_to_list(global_model_params) self.send_message_sync_model_to_client( sender_id, global_model_params, diff --git a/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py b/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py index 7437fb82ce..f3121cc0b7 100644 --- a/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py +++ b/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py @@ -69,8 +69,6 @@ def aggregate(self): model_list = [] for idx in range(self.worker_num): - if self.args.is_mobile == 1: - self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx]) model_list.append((self.sample_num_dict[idx], self.model_dict[idx])) # training_num += self.sample_num_dict[idx] logging.info("len of self.model_dict[idx] = " + str(len(self.model_dict))) diff --git a/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py.orig b/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py.orig index d4721e5809..286b28767b 100644 --- a/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py.orig +++ b/python/fedml/simulation/mpi/fedavg/FedAVGAggregator.py.orig @@ -72,8 +72,6 @@ class FedAVGAggregator(object): training_num = 0 for idx in range(self.worker_num): - if self.args.is_mobile == 1: - self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx]) model_list.append((self.sample_num_dict[idx], self.model_dict[idx])) # training_num += self.sample_num_dict[idx] logging.info("len of self.model_dict[idx] = " + str(len(self.model_dict))) diff --git a/python/fedml/simulation/mpi/fedavg/FedAVGTrainer.py b/python/fedml/simulation/mpi/fedavg/FedAVGTrainer.py index 296f2736ce..0f039efcb2 100644 --- a/python/fedml/simulation/mpi/fedavg/FedAVGTrainer.py +++ b/python/fedml/simulation/mpi/fedavg/FedAVGTrainer.py @@ -42,9 +42,6 @@ def train(self, round_idx=None): weights = self.trainer.get_model_params() - # transform Tensor to list - if self.args.is_mobile == 1: - weights = transform_tensor_to_list(weights) return weights, self.local_sample_number def test(self): diff --git a/python/fedml/simulation/mpi/fedavg/FedAvgClientManager.py b/python/fedml/simulation/mpi/fedavg/FedAvgClientManager.py index e2e1cb51be..2cc81658b9 100644 --- a/python/fedml/simulation/mpi/fedavg/FedAvgClientManager.py +++ b/python/fedml/simulation/mpi/fedavg/FedAvgClientManager.py @@ -38,9 +38,6 @@ def handle_message_init(self, msg_params): global_model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS) client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX) - if self.args.is_mobile == 1: - global_model_params = transform_list_to_tensor(global_model_params) - self.trainer.update_model(global_model_params) self.trainer.update_dataset(int(client_index)) self.args.round_idx = 0 @@ -55,9 +52,6 @@ def handle_message_receive_model_from_server(self, msg_params): model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS) client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX) - if self.args.is_mobile == 1: - model_params = transform_list_to_tensor(model_params) - self.trainer.update_model(model_params) self.trainer.update_dataset(int(client_index)) self.args.round_idx += 1 diff --git a/python/fedml/simulation/mpi/fedavg/FedAvgServerManager.py b/python/fedml/simulation/mpi/fedavg/FedAvgServerManager.py index db58ae4a91..4ba36f43a3 100644 --- a/python/fedml/simulation/mpi/fedavg/FedAvgServerManager.py +++ b/python/fedml/simulation/mpi/fedavg/FedAvgServerManager.py @@ -37,8 +37,6 @@ def send_init_msg(self): self.args.client_num_per_round, ) global_model_params = self.aggregator.get_global_model_params() - if self.args.is_mobile == 1: - global_model_params = transform_tensor_to_list(global_model_params) for process_id in range(1, self.size): self.send_message_init_config( process_id, global_model_params, client_indexes[process_id - 1] @@ -87,8 +85,6 @@ def handle_message_receive_model_from_client(self, msg_params): print("indexes of clients: " + str(client_indexes)) print("size = %d" % self.size) - if self.args.is_mobile == 1: - global_model_params = transform_tensor_to_list(global_model_params) for receiver_id in range(1, self.size): self.send_message_sync_model_to_client( diff --git a/python/fedml/simulation/mpi/fedavg_seq/FedAVGAggregator.py b/python/fedml/simulation/mpi/fedavg_seq/FedAVGAggregator.py index 66831dbe8d..5adb0e0208 100644 --- a/python/fedml/simulation/mpi/fedavg_seq/FedAVGAggregator.py +++ b/python/fedml/simulation/mpi/fedavg_seq/FedAVGAggregator.py @@ -204,9 +204,6 @@ def aggregate(self): training_num = 0 for idx in range(self.worker_num): - if self.args.is_mobile == 1: - self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx]) - # added for attack & defense; enable multiple defenses # if FedMLDefender.get_instance().is_defense_enabled(): # self.model_dict[idx] = FedMLDefender.get_instance().defend( diff --git a/python/fedml/simulation/mpi/fedavg_seq/FedAVGTrainer.py b/python/fedml/simulation/mpi/fedavg_seq/FedAVGTrainer.py index 94df1412de..148eb9b7c3 100644 --- a/python/fedml/simulation/mpi/fedavg_seq/FedAVGTrainer.py +++ b/python/fedml/simulation/mpi/fedavg_seq/FedAVGTrainer.py @@ -62,9 +62,6 @@ def train(self, round_idx=None): self.trainer.train(self.train_local, self.device, self.args) weights = self.trainer.get_model_params() - # transform Tensor to list - if self.args.is_mobile == 1: - weights = transform_tensor_to_list(weights) return weights, self.local_sample_number def test(self): diff --git a/python/fedml/simulation/mpi/fedavg_seq/FedAvgClientManager.py b/python/fedml/simulation/mpi/fedavg_seq/FedAvgClientManager.py index 89400b8522..7cfaa43056 100644 --- a/python/fedml/simulation/mpi/fedavg_seq/FedAvgClientManager.py +++ b/python/fedml/simulation/mpi/fedavg_seq/FedAvgClientManager.py @@ -35,8 +35,6 @@ def handle_message_init(self, msg_params): client_schedule = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_SCHEDULE) client_indexes = client_schedule[self.worker_id] - if self.args.is_mobile == 1: - global_model_params = transform_list_to_tensor(global_model_params) self.round_idx = 0 self.__train(global_model_params, client_indexes, average_weight_dict) @@ -53,9 +51,6 @@ def handle_message_receive_model_from_server(self, msg_params): client_schedule = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_SCHEDULE) client_indexes = client_schedule[self.worker_id] - if self.args.is_mobile == 1: - model_params = transform_list_to_tensor(global_model_params) - self.round_idx += 1 self.__train(global_model_params, client_indexes, average_weight_dict) if self.round_idx == self.num_rounds - 1: diff --git a/python/fedml/simulation/mpi/fedavg_seq/FedAvgServerManager.py b/python/fedml/simulation/mpi/fedavg_seq/FedAvgServerManager.py index 269023babd..f9269a50b9 100644 --- a/python/fedml/simulation/mpi/fedavg_seq/FedAvgServerManager.py +++ b/python/fedml/simulation/mpi/fedavg_seq/FedAvgServerManager.py @@ -44,8 +44,6 @@ def send_init_msg(self): global_model_params = self.aggregator.get_global_model_params() - if self.args.is_mobile == 1: - global_model_params = transform_tensor_to_list(global_model_params) for process_id in range(1, self.size): self.send_message_init_config(process_id, global_model_params, average_weight_dict, client_schedule) @@ -104,13 +102,9 @@ def handle_message_receive_model_from_client(self, msg_params): average_weight_dict = self.aggregator.get_average_weight(client_indexes) global_model_params = self.aggregator.get_global_model_params() - if self.args.is_mobile == 1: - global_model_params = transform_tensor_to_list(global_model_params) print("indexes of clients: " + str(client_indexes)) print("size = %d" % self.size) - if self.args.is_mobile == 1: - global_model_params = transform_tensor_to_list(global_model_params) for receiver_id in range(1, self.size): self.send_message_sync_model_to_client( diff --git a/python/fedml/simulation/mpi/fedgan/FedGANAggregator.py b/python/fedml/simulation/mpi/fedgan/FedGANAggregator.py index f297882b85..826b2da7ec 100644 --- a/python/fedml/simulation/mpi/fedgan/FedGANAggregator.py +++ b/python/fedml/simulation/mpi/fedgan/FedGANAggregator.py @@ -72,8 +72,6 @@ def aggregate(self): training_num = 0 for idx in range(self.worker_num): - if self.args.is_mobile == 1: - self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx]) model_list.append((self.sample_num_dict[idx], self.model_dict[idx])) training_num += self.sample_num_dict[idx] diff --git a/python/fedml/simulation/mpi/fedgan/FedGANTrainer.py b/python/fedml/simulation/mpi/fedgan/FedGANTrainer.py index fe792b40ec..d9caae6958 100644 --- a/python/fedml/simulation/mpi/fedgan/FedGANTrainer.py +++ b/python/fedml/simulation/mpi/fedgan/FedGANTrainer.py @@ -41,9 +41,6 @@ def train(self, round_idx=None): self.trainer.train(self.train_local, self.device, self.args) weights = self.trainer.get_model_params() - # transform Tensor to list - if self.args.is_mobile == 1: - weights = transform_tensor_to_list(weights) return weights, self.local_sample_number def test(self): diff --git a/python/fedml/simulation/mpi/fedgan/FedGanClientManager.py b/python/fedml/simulation/mpi/fedgan/FedGanClientManager.py index 24875df4ef..df8dcc55bd 100644 --- a/python/fedml/simulation/mpi/fedgan/FedGanClientManager.py +++ b/python/fedml/simulation/mpi/fedgan/FedGanClientManager.py @@ -29,9 +29,6 @@ def handle_message_init(self, msg_params): global_model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS) client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX) - if self.args.is_mobile == 1: - global_model_params = transform_list_to_tensor(global_model_params) - self.trainer.update_model(global_model_params) self.trainer.update_dataset(int(client_index)) self.args.round_idx = 0 @@ -46,9 +43,6 @@ def handle_message_receive_model_from_server(self, msg_params): model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS) client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX) - if self.args.is_mobile == 1: - model_params = transform_list_to_tensor(model_params) - self.trainer.update_model(model_params) self.trainer.update_dataset(int(client_index)) self.args.round_idx += 1 diff --git a/python/fedml/simulation/mpi/fedgan/FedGanServerManager.py b/python/fedml/simulation/mpi/fedgan/FedGanServerManager.py index 11325b7df0..15b8dc7390 100644 --- a/python/fedml/simulation/mpi/fedgan/FedGanServerManager.py +++ b/python/fedml/simulation/mpi/fedgan/FedGanServerManager.py @@ -37,8 +37,6 @@ def send_init_msg(self): self.args.client_num_per_round, ) global_model_params = self.aggregator.get_global_model_params() - if self.args.is_mobile == 1: - global_model_params = transform_tensor_to_list(global_model_params) for process_id in range(1, self.size): self.send_message_init_config( process_id, global_model_params, client_indexes[process_id - 1] @@ -87,8 +85,6 @@ def handle_message_receive_model_from_client(self, msg_params): print("indexes of clients: " + str(client_indexes)) print("size = %d" % self.size) - if self.args.is_mobile == 1: - global_model_params = transform_tensor_to_list(global_model_params) for receiver_id in range(1, self.size): self.send_message_sync_model_to_client( diff --git a/python/fedml/simulation/mpi/fednova/FedNovaAggregator.py b/python/fedml/simulation/mpi/fednova/FedNovaAggregator.py index 9940cdfbb2..71fb4743c0 100644 --- a/python/fedml/simulation/mpi/fednova/FedNovaAggregator.py +++ b/python/fedml/simulation/mpi/fednova/FedNovaAggregator.py @@ -172,9 +172,6 @@ def aggregate(self): training_num = 0 for idx in range(self.worker_num): - if self.args.is_mobile == 1: - self.result_dict[idx] = transform_list_to_tensor(self.result_dict[idx]) - if len(self.result_dict[idx]) > 0: # some workers may not have parameters # for client_index, client_result in self.result_dict[idx].items(): diff --git a/python/fedml/simulation/mpi/fednova/FedNovaClientManager.py b/python/fedml/simulation/mpi/fednova/FedNovaClientManager.py index d4160d578a..ad2a99b2ed 100644 --- a/python/fedml/simulation/mpi/fednova/FedNovaClientManager.py +++ b/python/fedml/simulation/mpi/fednova/FedNovaClientManager.py @@ -44,8 +44,6 @@ def handle_message_init(self, msg_params): client_schedule = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_SCHEDULE) client_indexes = client_schedule[self.worker_id] - if self.args.is_mobile == 1: - global_model_params = transform_list_to_tensor(global_model_params) self.round_idx = 0 self.__train(global_model_params, client_indexes, average_weight_dict) @@ -62,9 +60,6 @@ def handle_message_receive_model_from_server(self, msg_params): client_schedule = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_SCHEDULE) client_indexes = client_schedule[self.worker_id] - if self.args.is_mobile == 1: - model_params = transform_list_to_tensor(global_model_params) - self.round_idx += 1 self.__train(global_model_params, client_indexes, average_weight_dict) if self.round_idx == self.num_rounds - 1: diff --git a/python/fedml/simulation/mpi/fednova/FedNovaServerManager.py b/python/fedml/simulation/mpi/fednova/FedNovaServerManager.py index 44c130cd5f..97d257dbe2 100644 --- a/python/fedml/simulation/mpi/fednova/FedNovaServerManager.py +++ b/python/fedml/simulation/mpi/fednova/FedNovaServerManager.py @@ -46,8 +46,6 @@ def send_init_msg(self): global_model_params = self.aggregator.get_global_model_params() - if self.args.is_mobile == 1: - global_model_params = transform_tensor_to_list(global_model_params) for process_id in range(1, self.size): self.send_message_init_config( process_id, global_model_params, @@ -104,13 +102,9 @@ def handle_message_receive_model_from_client(self, msg_params): average_weight_dict = self.aggregator.get_average_weight(client_indexes) global_model_params = self.aggregator.get_global_model_params() - if self.args.is_mobile == 1: - global_model_params = transform_tensor_to_list(global_model_params) print("indexes of clients: " + str(client_indexes)) print("size = %d" % self.size) - if self.args.is_mobile == 1: - global_model_params = transform_tensor_to_list(global_model_params) for receiver_id in range(1, self.size): self.send_message_sync_model_to_client( diff --git a/python/fedml/simulation/mpi/fednova/FedNovaTrainer.py b/python/fedml/simulation/mpi/fednova/FedNovaTrainer.py index 9248b82338..d55e6d9822 100644 --- a/python/fedml/simulation/mpi/fednova/FedNovaTrainer.py +++ b/python/fedml/simulation/mpi/fednova/FedNovaTrainer.py @@ -64,9 +64,6 @@ def train(self, round_idx=None): ratio=self.local_sample_number / self.total_train_num) # weights = self.trainer.get_model_params() - # transform Tensor to list - if self.args.is_mobile == 1: - weights = transform_tensor_to_list(weights) # return weights, self.local_sample_number return avg_loss, norm_grad, tau_eff diff --git a/python/fedml/simulation/mpi/fedopt/FedOptAggregator.py b/python/fedml/simulation/mpi/fedopt/FedOptAggregator.py index 00d9712a8e..7000760666 100644 --- a/python/fedml/simulation/mpi/fedopt/FedOptAggregator.py +++ b/python/fedml/simulation/mpi/fedopt/FedOptAggregator.py @@ -83,8 +83,6 @@ def aggregate(self): training_num = 0 for idx in range(self.worker_num): - if self.args.is_mobile == 1: - self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx]) model_list.append((self.sample_num_dict[idx], self.model_dict[idx])) training_num += self.sample_num_dict[idx] diff --git a/python/fedml/simulation/mpi/fedopt/FedOptClientManager.py b/python/fedml/simulation/mpi/fedopt/FedOptClientManager.py index 527487aa55..63222972ea 100644 --- a/python/fedml/simulation/mpi/fedopt/FedOptClientManager.py +++ b/python/fedml/simulation/mpi/fedopt/FedOptClientManager.py @@ -29,9 +29,6 @@ def handle_message_init(self, msg_params): global_model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS) client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX) - if self.args.is_mobile == 1: - global_model_params = transform_list_to_tensor(global_model_params) - self.trainer.update_model(global_model_params) self.trainer.update_dataset(int(client_index)) self.args.round_idx = 0 @@ -46,9 +43,6 @@ def handle_message_receive_model_from_server(self, msg_params): model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS) client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX) - if self.args.is_mobile == 1: - model_params = transform_list_to_tensor(model_params) - self.trainer.update_model(model_params) self.trainer.update_dataset(int(client_index)) self.args.round_idx += 1 diff --git a/python/fedml/simulation/mpi/fedopt/FedOptServerManager.py b/python/fedml/simulation/mpi/fedopt/FedOptServerManager.py index 73a387f12f..febbb4ac39 100644 --- a/python/fedml/simulation/mpi/fedopt/FedOptServerManager.py +++ b/python/fedml/simulation/mpi/fedopt/FedOptServerManager.py @@ -85,9 +85,6 @@ def handle_message_receive_model_from_client(self, msg_params): ) print("size = %d" % self.size) - if self.args.is_mobile == 1: - print("transform_tensor_to_list") - global_model_params = transform_tensor_to_list(global_model_params) for receiver_id in range(1, self.size): self.send_message_sync_model_to_client( diff --git a/python/fedml/simulation/mpi/fedopt/FedOptTrainer.py b/python/fedml/simulation/mpi/fedopt/FedOptTrainer.py index 8680f55369..00661f35b0 100644 --- a/python/fedml/simulation/mpi/fedopt/FedOptTrainer.py +++ b/python/fedml/simulation/mpi/fedopt/FedOptTrainer.py @@ -38,7 +38,4 @@ def train(self, round_idx=None): weights = self.trainer.get_model_params() - # transform Tensor to list - if self.args.is_mobile == 1: - weights = transform_tensor_to_list(weights) return weights, self.local_sample_number diff --git a/python/fedml/simulation/mpi/fedopt_seq/FedOptAggregator.py b/python/fedml/simulation/mpi/fedopt_seq/FedOptAggregator.py index b2de3c1e72..dc91017c3b 100644 --- a/python/fedml/simulation/mpi/fedopt_seq/FedOptAggregator.py +++ b/python/fedml/simulation/mpi/fedopt_seq/FedOptAggregator.py @@ -213,8 +213,6 @@ def aggregate(self): training_num = 0 for idx in range(self.worker_num): - if self.args.is_mobile == 1: - self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx]) if len(self.model_dict[idx]) > 0: # some workers may not have parameters model_list.append(self.model_dict[idx]) diff --git a/python/fedml/simulation/mpi/fedopt_seq/FedOptClientManager.py b/python/fedml/simulation/mpi/fedopt_seq/FedOptClientManager.py index 484306dade..3ec4cdf370 100644 --- a/python/fedml/simulation/mpi/fedopt_seq/FedOptClientManager.py +++ b/python/fedml/simulation/mpi/fedopt_seq/FedOptClientManager.py @@ -35,9 +35,6 @@ def handle_message_init(self, msg_params): client_schedule = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_SCHEDULE) client_indexes = client_schedule[self.worker_id] - if self.args.is_mobile == 1: - global_model_params = transform_list_to_tensor(global_model_params) - self.round_idx = 0 self.__train(global_model_params, client_indexes, average_weight_dict) @@ -53,9 +50,6 @@ def handle_message_receive_model_from_server(self, msg_params): client_schedule = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_SCHEDULE) client_indexes = client_schedule[self.worker_id] - if self.args.is_mobile == 1: - model_params = transform_list_to_tensor(global_model_params) - self.round_idx += 1 self.__train(global_model_params, client_indexes, average_weight_dict) if self.round_idx == self.num_rounds - 1: diff --git a/python/fedml/simulation/mpi/fedopt_seq/FedOptServerManager.py b/python/fedml/simulation/mpi/fedopt_seq/FedOptServerManager.py index 98cfbf856d..207fcf37ed 100644 --- a/python/fedml/simulation/mpi/fedopt_seq/FedOptServerManager.py +++ b/python/fedml/simulation/mpi/fedopt_seq/FedOptServerManager.py @@ -97,9 +97,6 @@ def handle_message_receive_model_from_client(self, msg_params): global_model_params = self.aggregator.get_global_model_params() print("size = %d" % self.size) - if self.args.is_mobile == 1: - print("transform_tensor_to_list") - global_model_params = transform_tensor_to_list(global_model_params) for receiver_id in range(1, self.size): self.send_message_sync_model_to_client( diff --git a/python/fedml/simulation/mpi/fedopt_seq/FedOptTrainer.py b/python/fedml/simulation/mpi/fedopt_seq/FedOptTrainer.py index 8680f55369..00661f35b0 100644 --- a/python/fedml/simulation/mpi/fedopt_seq/FedOptTrainer.py +++ b/python/fedml/simulation/mpi/fedopt_seq/FedOptTrainer.py @@ -38,7 +38,4 @@ def train(self, round_idx=None): weights = self.trainer.get_model_params() - # transform Tensor to list - if self.args.is_mobile == 1: - weights = transform_tensor_to_list(weights) return weights, self.local_sample_number diff --git a/python/fedml/simulation/mpi/fedprox/FedProxAggregator.py b/python/fedml/simulation/mpi/fedprox/FedProxAggregator.py index 63c768b47e..026729e264 100644 --- a/python/fedml/simulation/mpi/fedprox/FedProxAggregator.py +++ b/python/fedml/simulation/mpi/fedprox/FedProxAggregator.py @@ -69,8 +69,6 @@ def aggregate(self): training_num = 0 for idx in range(self.worker_num): - if self.args.is_mobile == 1: - self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx]) model_list.append((self.sample_num_dict[idx], self.model_dict[idx])) training_num += self.sample_num_dict[idx] diff --git a/python/fedml/simulation/mpi/fedprox/FedProxClientManager.py b/python/fedml/simulation/mpi/fedprox/FedProxClientManager.py index 0375a0875b..860fe336b0 100644 --- a/python/fedml/simulation/mpi/fedprox/FedProxClientManager.py +++ b/python/fedml/simulation/mpi/fedprox/FedProxClientManager.py @@ -29,7 +29,6 @@ def handle_message_init(self, msg_params): global_model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS) client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX) - # if self.args.is_mobile == 1: global_model_params = transform_list_to_tensor(global_model_params) self.trainer.update_model(global_model_params) @@ -48,9 +47,6 @@ def handle_message_receive_model_from_server(self, msg_params): model_params = transform_list_to_tensor(model_params) - # if self.args.is_mobile == 1: - # model_params = transform_list_to_tensor(model_params) - self.trainer.update_model(model_params) self.trainer.update_dataset(int(client_index)) self.args.round_idx += 1 diff --git a/python/fedml/simulation/mpi/fedprox/FedProxServerManager.py b/python/fedml/simulation/mpi/fedprox/FedProxServerManager.py index ed903f5d5a..ccf9f087cf 100644 --- a/python/fedml/simulation/mpi/fedprox/FedProxServerManager.py +++ b/python/fedml/simulation/mpi/fedprox/FedProxServerManager.py @@ -86,9 +86,6 @@ def handle_message_receive_model_from_client(self, msg_params): print("indexes of clients: " + str(client_indexes)) print("size = %d" % self.size) - if self.args.is_mobile == 1: - print("transform_tensor_to_list") - global_model_params = transform_tensor_to_list(global_model_params) for receiver_id in range(1, self.size): self.send_message_sync_model_to_client( diff --git a/python/fedml/simulation/mpi/fedprox/FedProxTrainer.py b/python/fedml/simulation/mpi/fedprox/FedProxTrainer.py index 8ddc4138e8..e77096b452 100644 --- a/python/fedml/simulation/mpi/fedprox/FedProxTrainer.py +++ b/python/fedml/simulation/mpi/fedprox/FedProxTrainer.py @@ -45,9 +45,6 @@ def train(self, round_idx=None): weights = self.trainer.get_model_params() - # transform Tensor to list - if self.args.is_mobile == 1: - weights = transform_tensor_to_list(weights) return weights, self.local_sample_number def test(self): diff --git a/python/fedml/simulation/mpi/fedseg/FedSegAggregator.py b/python/fedml/simulation/mpi/fedseg/FedSegAggregator.py index 435efcdf79..bb126cae8e 100644 --- a/python/fedml/simulation/mpi/fedseg/FedSegAggregator.py +++ b/python/fedml/simulation/mpi/fedseg/FedSegAggregator.py @@ -68,8 +68,6 @@ def aggregate(self): training_num = 0 for idx in range(self.worker_num): - if self.args.is_mobile == 1: - self.model_dict[idx] = transform_list_to_tensor(self.model_dict[idx]) model_list.append((self.sample_num_dict[idx], self.model_dict[idx])) training_num += self.sample_num_dict[idx] diff --git a/python/fedml/simulation/mpi/fedseg/FedSegClientManager.py b/python/fedml/simulation/mpi/fedseg/FedSegClientManager.py index d4635365d5..de6ce9f19b 100644 --- a/python/fedml/simulation/mpi/fedseg/FedSegClientManager.py +++ b/python/fedml/simulation/mpi/fedseg/FedSegClientManager.py @@ -33,10 +33,6 @@ def handle_message_init(self, msg_params): client_index ) ) - - if self.args.is_mobile == 1: - global_model_params = transform_list_to_tensor(global_model_params) - self.trainer.update_model(global_model_params) self.trainer.update_dataset(int(client_index)) self.args.round_idx = 0 @@ -51,9 +47,6 @@ def handle_message_receive_model_from_server(self, msg_params): model_params = msg_params.get(MyMessage.MSG_ARG_KEY_MODEL_PARAMS) client_index = msg_params.get(MyMessage.MSG_ARG_KEY_CLIENT_INDEX) - if self.args.is_mobile == 1: - model_params = transform_list_to_tensor(model_params) - self.trainer.update_model(model_params) self.trainer.update_dataset(int(client_index)) self.args.round_idx += 1 diff --git a/python/fedml/simulation/mpi/fedseg/FedSegServerManager.py b/python/fedml/simulation/mpi/fedseg/FedSegServerManager.py index eb5c55163b..d1382c6d77 100644 --- a/python/fedml/simulation/mpi/fedseg/FedSegServerManager.py +++ b/python/fedml/simulation/mpi/fedseg/FedSegServerManager.py @@ -76,10 +76,6 @@ def handle_message_receive_model_from_client(self, msg_params): self.args.client_num_per_round, ) - if self.args.is_mobile == 1: - - global_model_params = transform_tensor_to_list(global_model_params) - for receiver_id in range(1, self.size): self.send_message_sync_model_to_client( receiver_id, global_model_params, client_indexes[receiver_id - 1] diff --git a/python/fedml/simulation/mpi/fedseg/FedSegTrainer.py b/python/fedml/simulation/mpi/fedseg/FedSegTrainer.py index b05b5c9f32..f0bda08b47 100644 --- a/python/fedml/simulation/mpi/fedseg/FedSegTrainer.py +++ b/python/fedml/simulation/mpi/fedseg/FedSegTrainer.py @@ -43,9 +43,6 @@ def train(self): self.trainer.train(self.train_local, self.device) weights = self.trainer.get_model_params() - # transform Tensor to list - if self.args.is_mobile == 1: - weights = transform_tensor_to_list(weights) return weights, self.local_sample_number def test(self): diff --git a/python/setup.py b/python/setup.py index 36a5e2ab98..da052dd3f2 100755 --- a/python/setup.py +++ b/python/setup.py @@ -73,7 +73,7 @@ def finalize_options(self): setup( name="fedml", - version="0.7.327", + version="0.7.330", author="FedML Team", author_email="ch@fedml.ai", description="A research and production integrated edge-cloud library for "