From 65703c5b8184bcdf345d4db541c6ac46b6a49c71 Mon Sep 17 00:00:00 2001
From: alexliang <alexliang.kh@gmail.com>
Date: Tue, 1 Nov 2022 02:24:16 +0800
Subject: [PATCH 1/3] update fedml docker building files and add diagnosis cli.

---
 devops/scripts/build-fedml-docker.sh          | 148 +++++++++---------
 devops/scripts/push-fedml-docker.sh           |   4 +-
 doc/en/starter/install/jetson.md              |   4 +-
 doc/en/starter/install/rpi.md                 |   4 +-
 docker/arm64v8/Dockerfile                     |   6 +-
 docker/build-docker.sh                        |  15 +-
 docker/x86-64/Dockerfile                      |   6 +-
 python/fedml/cli/cli.py                       |  42 +++++
 .../cli/edge_deployment/client_diagnosis.py   |  88 +++++++++++
 .../communication/s3/remote_storage.py        |  29 ++++
 10 files changed, 257 insertions(+), 89 deletions(-)
 create mode 100644 python/fedml/cli/edge_deployment/client_diagnosis.py

diff --git a/devops/scripts/build-fedml-docker.sh b/devops/scripts/build-fedml-docker.sh
index 3096a0fe31..46ea9ae738 100755
--- a/devops/scripts/build-fedml-docker.sh
+++ b/devops/scripts/build-fedml-docker.sh
@@ -6,110 +6,112 @@ pwd=`pwd`
 export FEDML_VERSION=`cat python/setup.py |grep version= |awk -F'=' '{print $2}' |awk -F',' '{print $1}'|awk -F'"' '{print $2}'`
 
 # Build X86_64 docker
-ARCH=x86_64
-OS=ubuntu18.04
-DISTRO=ubuntu1804
-PYTHON_VERSION=3.7
-PYTORCH_VERSION=1.12.1
-NCCL_VERSION=2.9.9
-CUDA_VERSION=11.3
-OUTPUT_IMAGE=fedml/fedml:latest-torch1.12.1-cuda11.3-cudnn8-devel
-NVIDIA_BASE_IMAGE=nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04
-PYTORCH_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu113
-PYTORCH_GEOMETRIC_URL=https://data.pyg.org/whl/torch-1.12.0+cu113.html
-CURRENT_IMAGE=fedml/fedml:${FEDML_VERSION}-torch1.12.1-cuda11.3-cudnn8-devel
+ARCH="x86_64"
+OS="ubuntu18.04"
+DISTRO="ubuntu1804"
+PYTHON_VERSION="3.7"
+PYTORCH_VERSION="1.12.1"
+NCCL_VERSION="2.9.9"
+CUDA_VERSION="11.3"
+LIB_NCCL="2.9.9-1+cuda11.3"
+OUTPUT_IMAGE="fedml/fedml:latest-torch1.12.1-cuda11.3-cudnn8-devel"
+NVIDIA_BASE_IMAGE="nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04"
+PYTORCH_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu113"
+PYTORCH_GEOMETRIC_URL="https://data.pyg.org/whl/torch-1.12.0+cu113.html"
+CURRENT_IMAGE="fedml/fedml:${FEDML_VERSION}-torch1.12.1-cuda11.3-cudnn8-devel"
 
 cd ./docker
 bash build-docker.sh $ARCH $OS $DISTRO $PYTHON_VERSION $PYTORCH_VERSION $NCCL_VERSION $CUDA_VERSION \
-     $OUTPUT_IMAGE $NVIDIA_BASE_IMAGE $PYTORCH_EXTRA_INDEX_URL $PYTORCH_GEOMETRIC_URL
+     $OUTPUT_IMAGE $NVIDIA_BASE_IMAGE $PYTORCH_EXTRA_INDEX_URL $PYTORCH_GEOMETRIC_URL $LIB_NCCL
 
 docker tag $OUTPUT_IMAGE $CURRENT_IMAGE
 cd $pwd
 
 
 # Build ARM_64 docker
-ARCH=arm64
-OS=ubuntu20.04
-DISTRO=ubuntu2004
-PYTHON_VERSION=3.8
-PYTORCH_VERSION=1.12.1
-NCCL_VERSION=2.9.6
-CUDA_VERSION=11.3
-OUTPUT_IMAGE=fedml/fedml:latest-torch1.12.1-cuda11.3-cudnn8-devel-arm64
-NVIDIA_BASE_IMAGE=nvidia/cuda:11.3.0-cudnn8-devel-ubuntu20.04@sha256:8e3df8601e81c57e85c082e9bcc6c547641635730ef8516b2cfa9c9e6c1208af
-PYTORCH_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu113
-PYTORCH_GEOMETRIC_URL=https://data.pyg.org/whl/torch-1.12.0+cu113.html
-CURRENT_IMAGE=fedml/fedml:${FEDML_VERSION}-torch1.12.1-cuda11.3-cudnn8-devel-arm64
+ARCH="arm64"
+OS="ubuntu20.04"
+DISTRO="ubuntu2004"
+PYTHON_VERSION="3.8"
+PYTORCH_VERSION="1.12.1"
+NCCL_VERSION="2.9.6"
+CUDA_VERSION="11.3"
+LIB_NCCL="2.9.6-1+cuda11.3"
+OUTPUT_IMAGE="fedml/fedml:latest-torch1.12.1-cuda11.3-cudnn8-devel-arm64"
+NVIDIA_BASE_IMAGE="nvidia/cuda:11.3.0-cudnn8-devel-ubuntu20.04@sha256:8e3df8601e81c57e85c082e9bcc6c547641635730ef8516b2cfa9c9e6c1208af"
+PYTORCH_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu113"
+PYTORCH_GEOMETRIC_URL="https://data.pyg.org/whl/torch-1.12.0+cu113.html"
+CURRENT_IMAGE="fedml/fedml:${FEDML_VERSION}-torch1.12.1-cuda11.3-cudnn8-devel-arm64"
 
+cd ./docker
 bash build-docker.sh $ARCH $OS $DISTRO $PYTHON_VERSION $PYTORCH_VERSION $NCCL_VERSION $CUDA_VERSION \
-     $OUTPUT_IMAGE $NVIDIA_BASE_IMAGE $PYTORCH_EXTRA_INDEX_URL $PYTORCH_GEOMETRIC_URL
+     $OUTPUT_IMAGE $NVIDIA_BASE_IMAGE $PYTORCH_EXTRA_INDEX_URL $PYTORCH_GEOMETRIC_URL $LIB_NCCL
 
-cd ./docker
 docker tag $OUTPUT_IMAGE $CURRENT_IMAGE
 
 cd $pwd
 
 # Build nvidia_jetson docker
-ARCH=jetson
-OS=ubuntu20.04
-DISTRO=ubuntu2004
-PYTHON_VERSION=3.7
-PYTORCH_VERSION=1.12.1
-NCCL_VERSION=2.9.6
-CUDA_VERSION=11.3
-OUTPUT_IMAGE=fedml/fedml:latest-nvidia-jetson-l4t-ml-r32.6.1-py3
-NVIDIA_BASE_IMAGE=nvidia/cuda:11.3.0-cudnn8-devel-ubuntu20.04@sha256:8e3df8601e81c57e85c082e9bcc6c547641635730ef8516b2cfa9c9e6c1208af
-PYTORCH_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu113
-PYTORCH_GEOMETRIC_URL=https://data.pyg.org/whl/torch-1.12.0+cu113.html
-CURRENT_IMAGE=fedml/fedml:${FEDML_VERSION}-nvidia-jetson-l4t-ml-r32.6.1-py3
+ARCH="jetson"
+OS="ubuntu20.04"
+DISTRO="ubuntu2004"
+PYTHON_VERSION="3.7"
+PYTORCH_VERSION="1.12.1"
+NCCL_VERSION="2.9.6"
+CUDA_VERSION="11.3"
+OUTPUT_IMAGE="fedml/fedml:latest-nvidia-jetson-l4t-ml-r32.6.1-py3"
+NVIDIA_BASE_IMAGE="nvidia/cuda:11.3.0-cudnn8-devel-ubuntu20.04@sha256:8e3df8601e81c57e85c082e9bcc6c547641635730ef8516b2cfa9c9e6c1208af"
+PYTORCH_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu113"
+PYTORCH_GEOMETRIC_URL="https://data.pyg.org/whl/torch-1.12.0+cu113.html"
+CURRENT_IMAGE="fedml/fedml:${FEDML_VERSION}-nvidia-jetson-l4t-ml-r32.6.1-py3"
 
 cd ./docker
 bash build-docker.sh $ARCH $OS $DISTRO $PYTHON_VERSION $PYTORCH_VERSION $NCCL_VERSION $CUDA_VERSION \
-     $OUTPUT_IMAGE $NVIDIA_BASE_IMAGE $PYTORCH_EXTRA_INDEX_URL $PYTORCH_GEOMETRIC_URL
+     $OUTPUT_IMAGE $NVIDIA_BASE_IMAGE $PYTORCH_EXTRA_INDEX_URL $PYTORCH_GEOMETRIC_URL $LIB_NCCL
 
 docker tag $OUTPUT_IMAGE $CURRENT_IMAGE
 
 cd $pwd
 
 # Build rpi32 docker
-ARCH=rpi32
-OS=ubuntu20.04
-DISTRO=ubuntu2004
-PYTHON_VERSION=3.7
-PYTORCH_VERSION=1.12.1
-NCCL_VERSION=2.9.6
-CUDA_VERSION=11.3
-OUTPUT_IMAGE=fedml/fedml:latest-raspberrypi4-32-py37
-NVIDIA_BASE_IMAGE=nvidia/cuda:11.3.0-cudnn8-devel-ubuntu20.04@sha256:8e3df8601e81c57e85c082e9bcc6c547641635730ef8516b2cfa9c9e6c1208af
-PYTORCH_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu113
-PYTORCH_GEOMETRIC_URL=https://data.pyg.org/whl/torch-1.12.0+cu113.html
-CURRENT_IMAGE=fedml/fedml:${FEDML_VERSION}-raspberrypi4-32-py37
-
-cd ./docker
-bash build-docker.sh $ARCH $OS $DISTRO $PYTHON_VERSION $PYTORCH_VERSION $NCCL_VERSION $CUDA_VERSION \
-     $OUTPUT_IMAGE $NVIDIA_BASE_IMAGE $PYTORCH_EXTRA_INDEX_URL $PYTORCH_GEOMETRIC_URL
-
-docker tag $OUTPUT_IMAGE $CURRENT_IMAGE
-
-cd $pwd
+#ARCH="rpi32"
+#OS="ubuntu20.04"
+#DISTRO="ubuntu2004"
+#PYTHON_VERSION="3.7"
+#PYTORCH_VERSION="1.12.1"
+#NCCL_VERSION="2.9.6"
+#CUDA_VERSION="11.3"
+#OUTPUT_IMAGE="fedml/fedml:latest-raspberrypi4-32-py37"
+#NVIDIA_BASE_IMAGE="nvidia/cuda:11.3.0-cudnn8-devel-ubuntu20.04@sha256:8e3df8601e81c57e85c082e9bcc6c547641635730ef8516b2cfa9c9e6c1208af"
+#PYTORCH_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu113"
+#PYTORCH_GEOMETRIC_URL="https://data.pyg.org/whl/torch-1.12.0+cu113.html"
+#CURRENT_IMAGE="fedml/fedml:${FEDML_VERSION}-raspberrypi4-32-py37"
+#
+#cd ./docker
+#bash build-docker.sh $ARCH $OS $DISTRO $PYTHON_VERSION $PYTORCH_VERSION $NCCL_VERSION $CUDA_VERSION \
+#     $OUTPUT_IMAGE $NVIDIA_BASE_IMAGE $PYTORCH_EXTRA_INDEX_URL $PYTORCH_GEOMETRIC_URL $LIB_NCCL
+#
+#docker tag $OUTPUT_IMAGE $CURRENT_IMAGE
+#
+#cd $pwd
 
 # Build rpi64 docker
-ARCH=rpi64
-OS=ubuntu20.04
-DISTRO=ubuntu2004
-PYTHON_VERSION=3.7
-PYTORCH_VERSION=1.12.1
-NCCL_VERSION=2.9.6
-CUDA_VERSION=11.3
-OUTPUT_IMAGE=fedml/fedml:latest-raspberrypi4-64-py37
-NVIDIA_BASE_IMAGE=nvidia/cuda:11.3.0-cudnn8-devel-ubuntu20.04@sha256:8e3df8601e81c57e85c082e9bcc6c547641635730ef8516b2cfa9c9e6c1208af
-PYTORCH_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu113
-PYTORCH_GEOMETRIC_URL=https://data.pyg.org/whl/torch-1.12.0+cu113.html
-CURRENT_IMAGE=fedml/fedml:${FEDML_VERSION}-raspberrypi4-64-py37
+ARCH="rpi64"
+OS="ubuntu20.04"
+DISTRO="ubuntu2004"
+PYTHON_VERSION="3.7"
+PYTORCH_VERSION="1.12.1"
+NCCL_VERSION="2.9.6"
+CUDA_VERSION="11.3"
+OUTPUT_IMAGE="fedml/fedml:latest-raspberrypi4-64-py37"
+NVIDIA_BASE_IMAGE="nvidia/cuda:11.3.0-cudnn8-devel-ubuntu20.04@sha256:8e3df8601e81c57e85c082e9bcc6c547641635730ef8516b2cfa9c9e6c1208af"
+PYTORCH_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu113"
+PYTORCH_GEOMETRIC_URL="https://data.pyg.org/whl/torch-1.12.0+cu113.html"
+CURRENT_IMAGE="fedml/fedml:${FEDML_VERSION}-raspberrypi4-64-py37"
 
 cd ./docker
 bash build-docker.sh $ARCH $OS $DISTRO $PYTHON_VERSION $PYTORCH_VERSION $NCCL_VERSION $CUDA_VERSION \
-     $OUTPUT_IMAGE $NVIDIA_BASE_IMAGE $PYTORCH_EXTRA_INDEX_URL $PYTORCH_GEOMETRIC_URL
+     $OUTPUT_IMAGE $NVIDIA_BASE_IMAGE $PYTORCH_EXTRA_INDEX_URL $PYTORCH_GEOMETRIC_URL $LIB_NCCL
 
 docker tag $OUTPUT_IMAGE $CURRENT_IMAGE
 
diff --git a/devops/scripts/push-fedml-docker.sh b/devops/scripts/push-fedml-docker.sh
index fb1cdb2550..32442da0a5 100755
--- a/devops/scripts/push-fedml-docker.sh
+++ b/devops/scripts/push-fedml-docker.sh
@@ -13,8 +13,8 @@ if [[ $push_arm_arch_images != "" ]]; then
   docker push fedml/fedml:latest-nvidia-jetson-l4t-ml-r32.6.1-py3
   docker push fedml/fedml:${FEDML_VERSION}-nvidia-jetson-l4t-ml-r32.6.1-py3
 
-  docker push fedml/fedml:latest-raspberrypi4-32-py37
-  docker push fedml/fedml:${FEDML_VERSION}-raspberrypi4-32-py37
+#  docker push fedml/fedml:latest-raspberrypi4-32-py37
+#  docker push fedml/fedml:${FEDML_VERSION}-raspberrypi4-32-py37
 
   docker push fedml/fedml:latest-raspberrypi4-64-py37
   docker push fedml/fedml:${FEDML_VERSION}-raspberrypi4-64-py37
diff --git a/doc/en/starter/install/jetson.md b/doc/en/starter/install/jetson.md
index a5efb9afd4..c6faba4f04 100644
--- a/doc/en/starter/install/jetson.md
+++ b/doc/en/starter/install/jetson.md
@@ -3,12 +3,12 @@
 ## Run FedML with Docker (Recommended)
 - Pull FedML RPI docker image
 ```
-docker pull fedml/fedml:nvidia-jetson-l4t-ml-r32.6.1-py3
+docker pull fedml/fedml:latest-nvidia-jetson-l4t-ml-r32.6.1-py3
 ```
 
 - Run Docker with "fedml login"
 ```
-docker run -t -i --runtime nvidia fedml/fedml:nvidia-jetson-l4t-ml-r32.6.1-py3 /bin/bash
+docker run -t -i --runtime nvidia fedml/fedml:latest-nvidia-jetson-l4t-ml-r32.6.1-py3 /bin/bash
 
 root@8bc0de2ce0e0:/usr/src/app# fedml login 299
 
diff --git a/doc/en/starter/install/rpi.md b/doc/en/starter/install/rpi.md
index e569ee6e77..a69fb2a9aa 100644
--- a/doc/en/starter/install/rpi.md
+++ b/doc/en/starter/install/rpi.md
@@ -3,12 +3,12 @@
 ## Run FedML with Docker (Recommended)
 - Pull FedML RPI docker image
 ```
-docker pull fedml/fedml:raspberrypi4-64-py37
+docker pull fedml/fedml:latest-raspberrypi4-64-py37
 ```
 
 - Run Docker with "fedml login"
 ```
-docker run -t -i fedml/fedml:raspberrypi4-64-py37 /bin/bash
+docker run -t -i fedml/fedml:latest-raspberrypi4-64-py37 /bin/bash
 
 root@8bc0de2ce0e0:/usr/src/app# fedml login 299
 
diff --git a/docker/arm64v8/Dockerfile b/docker/arm64v8/Dockerfile
index 25e598d4fa..cda2b483e2 100644
--- a/docker/arm64v8/Dockerfile
+++ b/docker/arm64v8/Dockerfile
@@ -30,6 +30,8 @@ ARG PYTORCH_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu113
 
 ARG PYTORCH_GEOMETRIC_URL=https://data.pyg.org/whl/torch-1.12.0+cu113.html
 
+ARG LIB_NCCL=2.9.6-1+cuda11.3
+
 RUN echo ${NCCL_VERSION}
 RUN echo ${CUDA_VERSION}
 
@@ -125,9 +127,7 @@ RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/
 add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/${DISTRO}/${ARCH} /" && \
 apt update && \
 #export NCCL_VERSION_ENV=`echo $NCCL_VERSION | awk -F'-1' '{print $1}'` && \
-export NCCL_VERSION_ENV=$NCCL_VERSION-1 && \
-export CUDA_VERSION_ENV=`echo $CUDA_VERSION | sed 's/\.1//g'` &&  \
-apt install -y --allow-change-held-packages libnccl2=${NCCL_VERSION_ENV}+cuda${CUDA_VERSION_ENV} libnccl-dev=${NCCL_VERSION_ENV}+cuda${CUDA_VERSION_ENV}
+apt install -y --allow-change-held-packages libnccl2=${LIB_NCCL} libnccl-dev=${LIB_NCCL}
 
 # ***************************************************************************
 # PyTorch (install from source)
diff --git a/docker/build-docker.sh b/docker/build-docker.sh
index 165c4f38c2..0130ccf0e9 100644
--- a/docker/build-docker.sh
+++ b/docker/build-docker.sh
@@ -9,22 +9,27 @@ NCCL_VERSION=$6
 CUDA_VERSION=$7
 OUTPUT_IMAGE=$8
 NVIDIA_BASE_IMAGE=""
-if [ $# -gt 9 ]; then
+if [ $# -ge 9 ]; then
   NVIDIA_BASE_IMAGE=$9
 fi
 
-if [ $# -gt 10 ]; then
+if [ $# -ge 10 ]; then
   PYTORCH_EXTRA_INDEX_URL=${10}
 else
   PYTORCH_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu113
 fi
 
-if [ $# -gt 11 ]; then
+if [ $# -ge 11 ]; then
   PYTORCH_GEOMETRIC_URL=${11}
 else
   PYTORCH_GEOMETRIC_URL=https://data.pyg.org/whl/torch-1.12.0+cu113.html
 fi
 
+if [ $# -ge 12 ]; then
+  LIB_NCCL=${12}
+else
+  LIB_NCCL="null"
+fi
 
 DOCKER_FILE_PATH=""
 if [[ "$ARCH" == "x86_64" ]]; then
@@ -36,7 +41,7 @@ elif [[  "$ARCH" == "jetson" ]]; then
 elif [[  "$ARCH" == "rpi32" ]]; then
   DOCKER_FILE_PATH=./rpi/Dockerfile_32bit_armv7
 elif [[  "$ARCH" == "rpi64" ]]; then
-  DOCKER_FILE_PATH=./rpi/Dockerfile_32bit_armv8
+  DOCKER_FILE_PATH=./rpi/Dockerfile_64bit_armv8
 fi
 
 if [ $DOCKER_FILE_PATH == "" ]; then
@@ -55,6 +60,7 @@ if [[ $NVIDIA_BASE_IMAGE != "" ]]; then
     --build-arg NVIDIA_BASE_IMAGE=$NVIDIA_BASE_IMAGE \
     --build-arg PYTORCH_EXTRA_INDEX_URL=$PYTORCH_EXTRA_INDEX_URL \
     --build-arg PYTORCH_GEOMETRIC_URL=$PYTORCH_GEOMETRIC_URL \
+    --build-arg LIB_NCCL=$LIB_NCCL \
     --network=host \
     -t $OUTPUT_IMAGE .
 else
@@ -67,6 +73,7 @@ else
     --build-arg CUDA_VERSION=$CUDA_VERSION \
     --build-arg PYTORCH_EXTRA_INDEX_URL=$PYTORCH_EXTRA_INDEX_URL \
     --build-arg PYTORCH_GEOMETRIC_URL=$PYTORCH_GEOMETRIC_URL \
+    --build-arg LIB_NCCL=$LIB_NCCL \
     --network=host \
     -t $OUTPUT_IMAGE .
 fi
diff --git a/docker/x86-64/Dockerfile b/docker/x86-64/Dockerfile
index aa4f6ecdfc..dc446fa414 100644
--- a/docker/x86-64/Dockerfile
+++ b/docker/x86-64/Dockerfile
@@ -29,6 +29,8 @@ ARG PYTORCH_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu113
 
 ARG PYTORCH_GEOMETRIC_URL=https://data.pyg.org/whl/torch-1.12.0+cu113.html
 
+ARG LIB_NCCL="2.9.9-1+cuda11.3"
+
 RUN echo ${NCCL_VERSION}
 RUN echo ${CUDA_VERSION}
 
@@ -124,9 +126,7 @@ RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/
 add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/${DISTRO}/${ARCH} /" && \
 apt update && \
 #export NCCL_VERSION_ENV=`echo $NCCL_VERSION | awk -F'-1' '{print $1}'` && \
-export NCCL_VERSION_ENV=$NCCL_VERSION && \
-export CUDA_VERSION_ENV=`echo $CUDA_VERSION | sed 's/\.1//g'` &&  \
-apt install -y --allow-change-held-packages libnccl2=${NCCL_VERSION_ENV}+cuda${CUDA_VERSION_ENV} libnccl-dev=${NCCL_VERSION_ENV}+cuda${CUDA_VERSION_ENV}
+apt install -y --allow-change-held-packages libnccl2=${LIB_NCCL} libnccl-dev=${LIB_NCCL}
 
 # ***************************************************************************
 # PyTorch (install from source)
diff --git a/python/fedml/cli/cli.py b/python/fedml/cli/cli.py
index 1b4b315302..f2e8ebb99b 100755
--- a/python/fedml/cli/cli.py
+++ b/python/fedml/cli/cli.py
@@ -18,6 +18,7 @@
 from ..cli.server_deployment.docker_login import login_with_server_docker_mode
 from ..cli.server_deployment.docker_login import logout_with_server_docker_mode
 from ..cli.server_deployment.docker_login import logs_with_server_docker_mode
+from ..cli.edge_deployment.client_diagnosis import ClientDiagnosis
 from ..cli.comm_utils import sys_utils
 
 
@@ -552,6 +553,47 @@ def build_mlops_package(
     return 0
 
 
+@cli.command("diagnosis", help="Diagnosis for open.fedml.ai, AWS S3 service and MQTT service")
+@click.option(
+    "--open", "-o", default=None, is_flag=True, help="check the connection to open.fedml.ai.",
+)
+@click.option(
+    "--s3", "-s", default=None, is_flag=True, help="check the connection to AWS S3 server.",
+)
+@click.option(
+    "--mqtt", "-m", default=None, is_flag=True, help="check the connection to mqtt.fedml.ai (1883).",
+)
+def mlops_diagnosis(open, s3, mqtt):
+    check_open = open
+    check_s3 = s3
+    check_mqtt = mqtt
+    if open is None and s3 is None and mqtt is None:
+        check_open = True
+        check_s3 = True
+        check_mqtt = True
+
+    if check_open:
+        is_open_connected = ClientDiagnosis.check_open_connection()
+        if is_open_connected:
+            click.echo("The connection to https://open.fedml.ai is OK.")
+        else:
+            click.echo("You can not connect to https://open.fedml.ai.")
+
+    if check_s3:
+        is_s3_connected = ClientDiagnosis.check_s3_connection()
+        if is_s3_connected:
+            click.echo("The connection to AWS S3 is OK.")
+        else:
+            click.echo("You can not connect to AWS S3.")
+
+    if check_mqtt:
+        is_mqtt_connected = ClientDiagnosis.check_mqtt_connection()
+        if is_mqtt_connected:
+            click.echo("The connection to mqtt.fedml.ai (port:1883) is OK.")
+        else:
+            click.echo("You can not connect to mqtt.fedml.ai (port:1883).")
+
+
 @cli.command(
     "env",
     help="collect the environment information to help debugging, including OS, Hardware Architecture, "
diff --git a/python/fedml/cli/edge_deployment/client_diagnosis.py b/python/fedml/cli/edge_deployment/client_diagnosis.py
new file mode 100644
index 0000000000..f7f76f0370
--- /dev/null
+++ b/python/fedml/cli/edge_deployment/client_diagnosis.py
@@ -0,0 +1,88 @@
+import time
+
+from ...core.mlops.mlops_configs import MLOpsConfigs
+from ...core.distributed.communication.s3.remote_storage import S3Storage
+from ...core.distributed.communication.mqtt.mqtt_manager import MqttManager
+
+
+class Singleton(object):
+    def __new__(cls):
+        if not hasattr(cls, "_instance"):
+            orig = super(Singleton, cls)
+            cls._instance = orig.__new__(cls)
+        return cls._instance
+
+
+class ClientDiagnosis(Singleton):
+    def __init__(self):
+        self.is_mqtt_connected = False
+
+    @staticmethod
+    def check_open_connection():
+        args = {"config_version": "release"}
+        try:
+            mqtt_config, s3_config = MLOpsConfigs.get_instance(args).fetch_configs()
+        except Exception as e:
+            return False
+
+        return True
+
+    @staticmethod
+    def check_s3_connection():
+        args = {"config_version": "release"}
+        try:
+            mqtt_config, s3_config = MLOpsConfigs.get_instance(args).fetch_configs()
+            s3_storage = S3Storage(s3_config)
+            download_ret = s3_storage.test_s3_base_cmds("d31df596c32943c64015a7e2d6e0d5a4", "test-base-cmds")
+            if download_ret:
+                return True
+        except Exception as e:
+            return False
+
+        return False
+
+    @staticmethod
+    def check_mqtt_connection():
+        args = {"config_version": "release"}
+        try:
+            mqtt_config, s3_config = MLOpsConfigs.get_instance(args).fetch_configs()
+            mqtt_mgr = MqttManager(
+                mqtt_config["BROKER_HOST"],
+                mqtt_config["BROKER_PORT"],
+                mqtt_config["MQTT_USER"],
+                mqtt_config["MQTT_PWD"],
+                mqtt_config["MQTT_KEEPALIVE"],
+                "fedml-diagnosis-id"
+            )
+            diagnosis = ClientDiagnosis()
+            diagnosis.is_mqtt_connected = False
+            mqtt_mgr.add_connected_listener(diagnosis.on_mqtt_connected)
+            mqtt_mgr.add_disconnected_listener(diagnosis.on_mqtt_disconnected)
+            mqtt_mgr.connect()
+            mqtt_mgr.loop_start()
+
+            count = 0
+            while not diagnosis.is_mqtt_connected:
+                count += 1
+                if count >= 15:
+                    return False;
+                time.sleep(1)
+
+            return True
+        except Exception as e:
+            print("MQTT connect exception: {}".format(str(e)))
+            return False
+
+        return False
+
+    def on_mqtt_connected(self, mqtt_client_object):
+        self.is_mqtt_connected = True
+        pass
+
+    def on_mqtt_disconnected(self, mqtt_client_object):
+        self.is_mqtt_connected = False
+
+
+if __name__ == "__main__":
+    pass
+
diff --git a/python/fedml/core/distributed/communication/s3/remote_storage.py b/python/fedml/core/distributed/communication/s3/remote_storage.py
index 67920a46e0..96b94f3d01 100644
--- a/python/fedml/core/distributed/communication/s3/remote_storage.py
+++ b/python/fedml/core/distributed/communication/s3/remote_storage.py
@@ -155,6 +155,35 @@ def download_file(self, path_s3, path_local):
         if retry >= 3:
             logging.error(f"Download zip failed after max retry.")
 
+    def test_s3_base_cmds(self, message_key, message_body):
+        """
+        test_s3_base_cmds
+        :param file_key: s3 message key
+        :param file_key: s3 message body
+        :return:
+        """
+        retry = 0
+        while retry < 3:
+            try:
+                global aws_s3_client
+                message_pkl = pickle.dumps(message_body)
+                aws_s3_client.put_object(
+                    Body=message_pkl, Bucket=self.bucket_name, Key=message_key, ACL="public-read",
+                )
+                obj = aws_s3_client.get_object(Bucket=self.bucket_name, Key=message_key)
+                message_pkl_downloaded = obj["Body"].read()
+                message_downloaded = pickle.loads(message_pkl_downloaded)
+                if str(message_body) == str(message_downloaded):
+                    break
+                retry += 1
+            except Exception as e:
+                raise Exception("S3 base commands test failed at retry count {}, exception: {}".format(str(retry), str(e)))
+                retry += 1
+        if retry >= 3:
+            raise Exception(f"S3 base commands test failed after max retry.")
+
+        return True
+
     def delete_s3_zip(self, path_s3):
         """
         delete s3 object

From 60624b20b343eca864e74c8b9ed5d0de307a0e1a Mon Sep 17 00:00:00 2001
From: alexliang <alexliang.kh@gmail.com>
Date: Tue, 1 Nov 2022 02:28:22 +0800
Subject: [PATCH 2/3] update diagnosis readme.

---
 doc/en/mlops/api.md        | 6 ++++++
 python/fedml/cli/README.md | 8 +++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/doc/en/mlops/api.md b/doc/en/mlops/api.md
index 57ce3827b9..261acf9e65 100644
--- a/doc/en/mlops/api.md
+++ b/doc/en/mlops/api.md
@@ -148,5 +148,11 @@ logs from edge server with docker mode:
 fedml logs --docker --docker-rank 1
 ```
 
+## 6. Diagnosis
+Diagnosis for connection to https://open.fedml.ai, AWS S3 and MQTT (mqtt.fedml.ai:1883)
+```
+fedml diagnosis --open --s3 --mqtt
+```
+
 You can also refer to a sanity check test example here:
 [https://github.com/FedML-AI/FedML/blob/master/test/fedml_user_code/cli/build.sh](https://github.com/FedML-AI/FedML/blob/master/test/fedml_user_code/cli/build.sh)
\ No newline at end of file
diff --git a/python/fedml/cli/README.md b/python/fedml/cli/README.md
index ec49372473..a89cf53d98 100644
--- a/python/fedml/cli/README.md
+++ b/python/fedml/cli/README.md
@@ -102,4 +102,10 @@ fedml logs -s
 logs from edge server with docker mode:
 ```
 fedml logs --docker --docker-rank 1
-```
\ No newline at end of file
+```
+
+## 6. Diagnosis
+Diagnosis for connection to https://open.fedml.ai, AWS S3 and MQTT (mqtt.fedml.ai:1883)
+```
+fedml diagnosis --open --s3 --mqtt
+```

From e5756cf7c86690d654d7002ace28760aa4052689 Mon Sep 17 00:00:00 2001
From: alexliang <alexliang.kh@gmail.com>
Date: Tue, 1 Nov 2022 02:29:25 +0800
Subject: [PATCH 3/3] update version to 0.7.340.

---
 python/fedml/__init__.py | 2 +-
 python/setup.py          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/fedml/__init__.py b/python/fedml/__init__.py
index fae53ca2f5..0e89f8c6a4 100644
--- a/python/fedml/__init__.py
+++ b/python/fedml/__init__.py
@@ -23,7 +23,7 @@
 _global_training_type = None
 _global_comm_backend = None
 
-__version__ = "0.7.339"
+__version__ = "0.7.340"
 
 
 def init(args=None):
diff --git a/python/setup.py b/python/setup.py
index aac2159cd2..2eae2cf746 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -75,7 +75,7 @@ def finalize_options(self):
 
 setup(
     name="fedml",
-    version="0.7.339",
+    version="0.7.340",
     author="FedML Team",
     author_email="ch@fedml.ai",
     description="A research and production integrated edge-cloud library for "