diff --git a/mephisto/abstractions/architects/channels/websocket_channel.py b/mephisto/abstractions/architects/channels/websocket_channel.py index ecf344422..0eac5ffbc 100644 --- a/mephisto/abstractions/architects/channels/websocket_channel.py +++ b/mephisto/abstractions/architects/channels/websocket_channel.py @@ -23,6 +23,8 @@ logger = get_logger(name=__name__) +MAX_RETRIES = 3 + class WebsocketChannel(Channel): """ @@ -55,6 +57,7 @@ def __init__( self._is_alive = False self._is_closed = False self._socket_task: Optional[asyncio.Task] = None + self._retries = MAX_RETRIES def is_closed(self): """ @@ -161,6 +164,18 @@ async def run_socket(): f"Unhandled OSError exception in socket {e}, attempting restart" ) await asyncio.sleep(0.2) + except websockets.exceptions.InvalidStatusCode as e: + if self._retries == 0: + raise ConnectionRefusedError( + "Could not connect after retries" + ) from e + curr_retry = MAX_RETRIES - self._retries + logger.exception( + f"Status code error {repr(e)}, attempting retry {curr_retry}", + exc_info=True, + ) + await asyncio.sleep(1 + curr_retry) + self._retries += 1 except Exception as e: logger.exception(f"Unhandled exception in socket {e}, {repr(e)}") if self._is_closed: diff --git a/mephisto/abstractions/architects/ec2/run_scripts/node/init_server.sh b/mephisto/abstractions/architects/ec2/run_scripts/node/init_server.sh index c1c75f220..ba6c7bc03 100644 --- a/mephisto/abstractions/architects/ec2/run_scripts/node/init_server.sh +++ b/mephisto/abstractions/architects/ec2/run_scripts/node/init_server.sh @@ -1,7 +1,8 @@ #!/bin/bash echo "Installing basic requirements..." -sudo yum update -y >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 +# Following is commented out until the aws linux2 repo is no longer lagging +# sudo yum update -y >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 sudo yum install -y httpd >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 echo "Downloading Node..."