Skip to content

Commit

Permalink
Merge pull request #566 from FedML-AI/dev/v0.7.0
Browse files Browse the repository at this point in the history
Dev/v0.7.0
  • Loading branch information
chaoyanghe committed Sep 11, 2022
2 parents e2e859c + b14723c commit d5221b9
Show file tree
Hide file tree
Showing 81 changed files with 480 additions and 758 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ on:

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
cross-silo-dp-test:
cross-silo-cdp-test:
defaults:
run:
shell: bash
Expand Down Expand Up @@ -46,23 +46,23 @@ jobs:
- name: server - cross-silo - cdp
run: |
cd examples/cross_silo/mqtt_s3_fedavg_central_dp_mnist_lr_example
cd examples/cross_silo/mqtt_s3_fedavg_cdp_mnist_lr_example
run_id=cross-silo-ho-${{ format('{0}{1}{2}{3}', github.run_id, matrix.os, matrix.arch, matrix.python-version) }}
echo ${run_id}
bash run_server.sh $run_id
if: ${{ matrix.client-index == '0' }}

- name: client 1 - cross-silo - cdp
run: |
cd examples/cross_silo/mqtt_s3_fedavg_central_dp_mnist_lr_example
cd examples/cross_silo/mqtt_s3_fedavg_cdp_mnist_lr_example
run_id=cross-silo-ho-${{ format('{0}{1}{2}{3}', github.run_id, matrix.os, matrix.arch, matrix.python-version) }}
echo ${run_id}
bash run_client.sh 1 $run_id
if: ${{ matrix.client-index == '1' }}

- name: client 2 - cross-silo - cdp
run: |
cd examples/cross_silo/mqtt_s3_fedavg_central_dp_mnist_lr_example
cd examples/cross_silo/mqtt_s3_fedavg_cdp_mnist_lr_example
run_id=cross-silo-ho-${{ format('{0}{1}{2}{3}', github.run_id, matrix.os, matrix.arch, matrix.python-version) }}
echo ${run_id}
bash run_client.sh 2 $run_id
Expand Down
69 changes: 69 additions & 0 deletions .github/workflows/smoke_test_cross_silo_fedavg_ldp_linux.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# This is a basic workflow to help you get started with Actions

name: LDP-Linux

# Controls when the workflow will run
on:
# Triggers the workflow on push or pull request events but only for the master branch
schedule:
# Nightly build at 12:12 A.M.
- cron: "12 12 */1 * *"
pull_request:
branches: [ master, test/v0.7.0, dev/0.7.0 ]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
cross-silo-ldp-test:
defaults:
run:
shell: bash
working-directory: python
strategy:
fail-fast: false
matrix:
os: [ ubuntu-latest]
arch: [X64]
python-version: ['3.8']
client-index: ['0', '1', '2']
# exclude:
# - os: macos-latest
# python-version: '3.8'
# - os: windows-latest
# python-version: '3.6'
runs-on: [self-hosted, runner-linux, devops]
timeout-minutes: 15
steps:
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- uses: actions/checkout@v3
- name: pip install -e ./
run: |
pip install -e ./
- name: server - cross-silo - ldp
run: |
cd examples/cross_silo/mqtt_s3_fedavg_ldp_mnist_lr_example
run_id=cross-silo-ho-${{ format('{0}{1}{2}{3}', github.run_id, matrix.os, matrix.arch, matrix.python-version) }}
echo ${run_id}
bash run_server.sh $run_id
if: ${{ matrix.client-index == '0' }}

- name: client 1 - cross-silo - ldp
run: |
cd examples/cross_silo/mqtt_s3_fedavg_ldp_mnist_lr_example
run_id=cross-silo-ho-${{ format('{0}{1}{2}{3}', github.run_id, matrix.os, matrix.arch, matrix.python-version) }}
echo ${run_id}
bash run_client.sh 1 $run_id
if: ${{ matrix.client-index == '1' }}

- name: client 2 - cross-silo - ldp
run: |
cd examples/cross_silo/mqtt_s3_fedavg_ldp_mnist_lr_example
run_id=cross-silo-ho-${{ format('{0}{1}{2}{3}', github.run_id, matrix.os, matrix.arch, matrix.python-version) }}
echo ${run_id}
bash run_client.sh 2 $run_id
if: ${{ matrix.client-index == '2' }}
3 changes: 0 additions & 3 deletions .github/workflows/smoke_test_ml_engines_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ jobs:
- name: server - mxnet - fedavg
run: |
sudo apt-get install libquadmath0
cd examples/cross_silo/mxnet_mqtt_s3_fedavg_mnist_lr_example
run_id=mxnet-ml-engine-${{ format('{0}{1}{2}{3}', github.run_id, matrix.os, matrix.arch, matrix.python-version) }}
echo ${run_id}
Expand All @@ -162,7 +161,6 @@ jobs:

- name: client 1 - mxnet - fedavg
run: |
sudo apt-get install libquadmath0
cd examples/cross_silo/mxnet_mqtt_s3_fedavg_mnist_lr_example
run_id=mxnet-ml-engine-${{ format('{0}{1}{2}{3}', github.run_id, matrix.os, matrix.arch, matrix.python-version) }}
echo ${run_id}
Expand All @@ -171,7 +169,6 @@ jobs:

- name: client 2 - mxnet - fedavg
run: |
sudo apt-get install libquadmath0
cd examples/cross_silo/mxnet_mqtt_s3_fedavg_mnist_lr_example
run_id=mxnet-ml-engine-${{ format('{0}{1}{2}{3}', github.run_id, matrix.os, matrix.arch, matrix.python-version) }}
echo ${run_id}
Expand Down
2 changes: 1 addition & 1 deletion python/examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
| mxnet_mqtt_s3_fedavg_mnist_lr_example | Octopus (cross-silo) | FedAvg | MNIST | Logistic Regression | MQTT_S3 | mxnet | [Link](cross_silo/mxnet_mqtt_s3_fedavg_mnist_lr_example) | [Link](cross_silo/mxnet_mqtt_s3_fedavg_mnist_lr_example/README.md) |
| mqtt_s3_fedavg_attack_mnist_lr_example | Octopus (cross-silo) | FedAvg | MNIST | Logistic Regression | MQTT_S3 | pytorch | [Link](cross_silo/mqtt_s3_fedavg_attack_mnist_lr_example) | [Link](cross_silo/mqtt_s3_fedavg_attack_mnist_lr_example/README.md) |
| mqtt_s3_fedavg_defense_mnist_lr_example | Octopus (cross-silo) | FedAvg | MNIST | Logistic Regression | MQTT_S3 | pytorch | [Link](cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example) | [Link](cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/README.md) |
| mqtt_s3_fedavg_central_dp_mnist_lr_example | Octopus (cross-silo) | FedAvg | MNIST | Logistic Regression | MQTT_S3 | pytorch | [Link](cross_silo/mqtt_s3_fedavg_central_dp_mnist_lr_example) | [Link](cross_silo/mqtt_s3_fedavg_central_dp_mnist_lr_example/README.md) |
| mqtt_s3_fedavg_central_dp_mnist_lr_example | Octopus (cross-silo) | FedAvg | MNIST | Logistic Regression | MQTT_S3 | pytorch | [Link](cross_silo/mqtt_s3_fedavg_dp_mnist_lr_example) | [Link](cross_silo/mqtt_s3_fedavg_dp_mnist_lr_example/README.md) |
| mqtt_s3_fedavg_local_dp_mnist_lr_example | Octopus (cross-silo) | FedAvg | MNIST | Logistic Regression | MQTT_S3 | pytorch | [Link](cross_silo/mqtt_s3_fedavg_local_dp_mnist_lr_example) | [Link](cross_silo/mqtt_s3_fedavg_local_dp_mnist_lr_example/README.md) |


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ train_args:
client_id_list:
client_num_in_total: 1000
client_num_per_round: 2
comm_round: 50
comm_round: 5 # we use 5 for quick sanity check. please modify a reasonable value
epochs: 1
batch_size: 10
client_optimizer: sgd
Expand Down Expand Up @@ -54,7 +54,7 @@ tracking_args:
# example:
dp_args:
enable_dp: true
dp_type: cdp # cdp or ldp
dp_solution_type: cdp
epsilon: 0.5
delta: 0.1
sensitivity: 1
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# You can define a cluster containing multiple GPUs within multiple machines by defining `gpu_mapping.yaml` as follows:

# config_cluster0:
# host_name_node0: [num_of_processes_on_GPU0, num_of_processes_on_GPU1, num_of_processes_on_GPU2, num_of_processes_on_GPU3, ..., num_of_processes_on_GPU_n]
# host_name_node1: [num_of_processes_on_GPU0, num_of_processes_on_GPU1, num_of_processes_on_GPU2, num_of_processes_on_GPU3, ..., num_of_processes_on_GPU_n]
# host_name_node_m: [num_of_processes_on_GPU0, num_of_processes_on_GPU1, num_of_processes_on_GPU2, num_of_processes_on_GPU3, ..., num_of_processes_on_GPU_n]


# this is used for 10 clients and 1 server training within a single machine which has 4 GPUs
mapping_default:
ChaoyangHe-GPU-RTX2080Tix4: [3, 3, 3, 2]

# this is used for 4 clients and 1 server training within a single machine which has 4 GPUs
mapping_config1_5:
host1: [2, 1, 1, 1]

# this is used for 10 clients and 1 server training within a single machine which has 4 GPUs
mapping_config2_11:
host1: [3, 3, 3, 2]

# this is used for 10 clients and 1 server training within a single machine which has 8 GPUs
mapping_config3_11:
host1: [2, 2, 2, 1, 1, 1, 1, 1]

# this is used for 4 clients and 1 server training within a single machine which has 8 GPUs, but you hope to skip the GPU device ID.
mapping_config4_5:
host1: [1, 0, 0, 1, 1, 0, 1, 1]

# this is used for 4 clients and 1 server training using 6 machines, each machine has 2 GPUs inside, but you hope to use the second GPU.
mapping_config5_6:
host1: [0, 1]
host2: [0, 1]
host3: [0, 1]
host4: [0, 1]
host5: [0, 1]
# this is used for 4 clients and 1 server training using 2 machines, each machine has 2 GPUs inside, but you hope to use the second GPU.
mapping_config5_2:
gpu-worker2: [1,1]
gpu-worker1: [2,1]

# this is used for 10 clients and 1 server training using 4 machines, each machine has 2 GPUs inside, but you hope to use the second GPU.
mapping_config5_4:
gpu-worker2: [1,1]
gpu-worker1: [2,1]
gpu-worker3: [3,1]
gpu-worker4: [1,1]

# for grpc GPU mapping
mapping_FedML_gRPC:
hostname_node_server: [1]
hostname_node_1: [1, 0, 0, 0]
hostname_node_2: [1, 0, 0, 0]

# for torch RPC GPU mapping
mapping_FedML_tRPC:
lambda-server1: [0, 0, 0, 0, 2, 2, 1, 1]
lambda-server2: [2, 1, 1, 1, 0, 0, 0, 0]

#mapping_FedML_tRPC:
# lambda-server1: [0, 0, 0, 0, 3, 3, 3, 2]
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ common_args:
scenario: "horizontal"
using_mlops: false
random_seed: 0
config_version: release

environment_args:
bootstrap: config/bootstrap.sh
Expand All @@ -25,15 +24,15 @@ train_args:
client_id_list:
client_num_in_total: 1000
client_num_per_round: 2
comm_round: 10
comm_round: 5 # we use 5 for quick sanity check. please modify a reasonable value
epochs: 1
batch_size: 10
client_optimizer: sgd
learning_rate: 0.03
weight_decay: 0.001

validation_args:
frequency_of_the_test: 1
frequency_of_the_test: 5

device_args:
using_gpu: false
Expand All @@ -42,9 +41,8 @@ device_args:

comm_args:
backend: "MQTT_S3"
mqtt_config_path:
s3_config_path:
grpc_ipconfig_path: ./config/grpc_ipconfig.csv
mqtt_config_path: config/mqtt_config.yaml
s3_config_path: config/s3_config.yaml

tracking_args:
# the default log path is at ~/fedml-client/fedml/logs/ and ~/fedml-server/fedml/logs/
Expand All @@ -53,11 +51,10 @@ tracking_args:
wandb_project: fedml
wandb_name: fedml_torch_fedavg_mnist_lr


# example:
dp_args:
enable_dp: true
dp_type: ldp # cdp or ldp
dp_solution_type: ldp
epsilon: 0.5
delta: 0.1
sensitivity: 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env bash
RANK=$1
python3 torch_client.py --cf config/fedml_config.yaml --rank $RANK --role client
RUN_ID=$2
python3 torch_client.py --cf config/fedml_config.yaml --rank $RANK --role client --run_id $RUN_ID
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/usr/bin/env bash

python3 torch_server.py --cf config/fedml_config.yaml --rank 0 --role server
RUN_ID=$1
python3 torch_server.py --cf config/fedml_config.yaml --rank 0 --role server --run_id $RUN_ID
Empty file.

This file was deleted.

This file was deleted.

This file was deleted.

Loading

0 comments on commit d5221b9

Please sign in to comment.