From bc3526f3407e523234e446d7676fc10af1f4a641 Mon Sep 17 00:00:00 2001 From: Jack Urbanek Date: Fri, 12 Nov 2021 13:27:57 -0500 Subject: [PATCH] Creating the EC2Architect (#603) * Creating the EC2Architect * small fixes * Allowing multiple ssh ips * Cleaning up scripts * Formatting * Fallback server doesn't log liveliness checks * Shutdown and relaunch cleanups * Fallback server push cleanup * Forgot to register zone records --- .../abstractions/architects/ec2/.gitignore | 3 + .../abstractions/architects/ec2/__init__.py | 5 + .../architects/ec2/cleanup_ec2_resources.py | 22 + .../architects/ec2/ec2_architect.py | 351 ++++++ .../architects/ec2/ec2_helpers.py | 1086 +++++++++++++++++ .../architects/ec2/fallback_server/README.md | 7 + .../ec2/fallback_server/access_logs/README.md | 5 + .../architects/ec2/fallback_server/app.py | 64 + .../ec2/fallback_server/fallback.service | 12 + .../fallback_server/scripts/first_setup.sh | 15 + .../ec2/fallback_server/scripts/run_server.sh | 5 + .../fallback_server/templates/landing.html | 33 + .../architects/ec2/keypairs/README.md | 6 + .../architects/ec2/prepare_ec2_servers.py | 227 ++++ .../ec2/run_scripts/flask/init_server.sh | 17 + .../ec2/run_scripts/flask/router.service | 12 + .../ec2/run_scripts/flask/run_server.sh | 4 + .../ec2/run_scripts/node/init_server.sh | 24 + .../ec2/run_scripts/node/router.service | 12 + .../ec2/run_scripts/node/run_server.sh | 5 + .../architects/ec2/servers/README.md | 3 + .../static_html_task/static_html_blueprint.py | 1 + mephisto/operations/registry.py | 2 + mephisto/tools/scripts.py | 5 +- 24 files changed, 1925 insertions(+), 1 deletion(-) create mode 100644 mephisto/abstractions/architects/ec2/.gitignore create mode 100644 mephisto/abstractions/architects/ec2/__init__.py create mode 100644 mephisto/abstractions/architects/ec2/cleanup_ec2_resources.py create mode 100644 mephisto/abstractions/architects/ec2/ec2_architect.py create mode 100644 mephisto/abstractions/architects/ec2/ec2_helpers.py create mode 100644 mephisto/abstractions/architects/ec2/fallback_server/README.md create mode 100644 mephisto/abstractions/architects/ec2/fallback_server/access_logs/README.md create mode 100644 mephisto/abstractions/architects/ec2/fallback_server/app.py create mode 100644 mephisto/abstractions/architects/ec2/fallback_server/fallback.service create mode 100644 mephisto/abstractions/architects/ec2/fallback_server/scripts/first_setup.sh create mode 100644 mephisto/abstractions/architects/ec2/fallback_server/scripts/run_server.sh create mode 100644 mephisto/abstractions/architects/ec2/fallback_server/templates/landing.html create mode 100644 mephisto/abstractions/architects/ec2/keypairs/README.md create mode 100644 mephisto/abstractions/architects/ec2/prepare_ec2_servers.py create mode 100644 mephisto/abstractions/architects/ec2/run_scripts/flask/init_server.sh create mode 100644 mephisto/abstractions/architects/ec2/run_scripts/flask/router.service create mode 100644 mephisto/abstractions/architects/ec2/run_scripts/flask/run_server.sh create mode 100644 mephisto/abstractions/architects/ec2/run_scripts/node/init_server.sh create mode 100644 mephisto/abstractions/architects/ec2/run_scripts/node/router.service create mode 100644 mephisto/abstractions/architects/ec2/run_scripts/node/run_server.sh create mode 100644 mephisto/abstractions/architects/ec2/servers/README.md diff --git a/mephisto/abstractions/architects/ec2/.gitignore b/mephisto/abstractions/architects/ec2/.gitignore new file mode 100644 index 000000000..6a7520bc7 --- /dev/null +++ b/mephisto/abstractions/architects/ec2/.gitignore @@ -0,0 +1,3 @@ +keypairs/* +servers/* +server_details.json diff --git a/mephisto/abstractions/architects/ec2/__init__.py b/mephisto/abstractions/architects/ec2/__init__.py new file mode 100644 index 000000000..240697e32 --- /dev/null +++ b/mephisto/abstractions/architects/ec2/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/mephisto/abstractions/architects/ec2/cleanup_ec2_resources.py b/mephisto/abstractions/architects/ec2/cleanup_ec2_resources.py new file mode 100644 index 000000000..e4913ecf1 --- /dev/null +++ b/mephisto/abstractions/architects/ec2/cleanup_ec2_resources.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import mephisto.abstractions.architects.ec2.ec2_helpers as ec2_helpers +import boto3 # type: ignore +import os +import json + +from typing import Dict, Any + + +# TODO Hydrize +def main(): + iam_role_name = input("Please enter local profile name for IAM role\n>> ") + ec2_helpers.cleanup_fallback_server(iam_role_name) + + +if __name__ == "__main__": + main() diff --git a/mephisto/abstractions/architects/ec2/ec2_architect.py b/mephisto/abstractions/architects/ec2/ec2_architect.py new file mode 100644 index 000000000..0e5c695f9 --- /dev/null +++ b/mephisto/abstractions/architects/ec2/ec2_architect.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import sh # type: ignore +import shutil +import time +import requests +import re +import json +import boto3 # type: ignore +from dataclasses import dataclass, field +from omegaconf import MISSING, DictConfig # type: ignore +from mephisto.abstractions.architect import Architect, ArchitectArgs +from mephisto.abstractions.architects.router.build_router import build_router +from mephisto.abstractions.architects.channels.websocket_channel import WebsocketChannel +from mephisto.operations.registry import register_mephisto_abstraction +from typing import List, Dict, Optional, TYPE_CHECKING, Callable + +import mephisto.abstractions.architects.ec2.ec2_helpers as ec2_helpers +from mephisto.abstractions.architects.ec2.ec2_helpers import ( + DEFAULT_FALLBACK_FILE, + DEFAULT_SERVER_DETAIL_LOCATION, + SCRIPTS_DIRECTORY, +) + +if TYPE_CHECKING: + from mephisto.abstractions.channel import Channel + from mephisto.data_model.packet import Packet + from mephisto.data_model.task_run import TaskRun + from mephisto.abstractions.database import MephistoDB + from mephisto.abstractions.blueprint import SharedTaskState + +from mephisto.operations.logger_core import get_logger + +logger = get_logger(name=__name__) + +ARCHITECT_TYPE = "ec2" +FINAL_SERVER_BUILD_DIRECTORY = "routing_server" +DEPLOY_WAIT_TIME = 3 + + +@dataclass +class EC2ArchitectArgs(ArchitectArgs): + """Additional arguments for configuring a heroku architect""" + + _architect_type: str = ARCHITECT_TYPE + instance_type: str = field( + default="t2.micro", metadata={"help": "Instance type to run router"} + ) + subdomain: str = field( + default="${mephisto.task.task_name}", + metadata={"help": "Subdomain name for routing"}, + ) + profile_name: str = field( + default=MISSING, metadata={"help": "Profile name for deploying an ec2 instance"} + ) + + +@register_mephisto_abstraction() +class EC2Architect(Architect): + """ + Sets up a server on heroku and deploys the task on that server + """ + + ArgsClass = EC2ArchitectArgs + ARCHITECT_TYPE = ARCHITECT_TYPE + + def __init__( + self, + db: "MephistoDB", + args: DictConfig, + shared_state: "SharedTaskState", + task_run: "TaskRun", + build_dir_root: str, + ): + """ + Create an architect with all required parameters for launch loaded + """ + self.args = args + self.task_run = task_run + with open(DEFAULT_FALLBACK_FILE, "r") as fallback_detail_file: + self.fallback_details = json.load(fallback_detail_file) + + self.subdomain = args.architect.subdomain + self.root_domain = self.fallback_details["domain"] + self.router_name = f"{self.subdomain}-routing-server" + self.full_domain = f"{self.subdomain}.{self.root_domain}" + self.server_source_path = args.architect.get("server_source_path", None) + self.instance_type = args.architect.instance_type + self.profile_name = args.architect.profile_name + self.server_type: str = args.architect.server_type + self.build_dir = build_dir_root + self.server_detail_path = os.path.join( + DEFAULT_SERVER_DETAIL_LOCATION, f"{self.subdomain}.json" + ) + + self.session = boto3.Session( + profile_name=self.profile_name, region_name="us-east-2" + ) + + self.server_dir: Optional[str] = None + self.server_id: Optional[str] = None + self.allocation_id: Optional[str] = None + self.association_id: Optional[str] = None + self.target_group_arn: Optional[str] = None + self.router_rule_arn: Optional[str] = None + self.created = False + + def _get_socket_urls(self) -> List[str]: + """Returns the path to the heroku app socket""" + return [f"wss://{self.full_domain}/"] + + def get_channels( + self, + on_channel_open: Callable[[str], None], + on_catastrophic_disconnect: Callable[[str], None], + on_message: Callable[[str, "Packet"], None], + ) -> List["Channel"]: + """ + Return a list of all relevant channels that the Supervisor will + need to register to in order to function + """ + urls = self._get_socket_urls() + return [ + WebsocketChannel( + f"ec2_channel_{self.subdomain}_{idx}", + on_channel_open=on_channel_open, + on_catastrophic_disconnect=on_catastrophic_disconnect, + on_message=on_message, + socket_url=url, + ) + for idx, url in enumerate(urls) + ] + + def download_file(self, target_filename: str, save_dir: str) -> None: + """ + Download the file from local storage + """ + target_url = f"https://{self.full_domain}/download_file/{target_filename}" + dest_path = os.path.join(save_dir, target_filename) + r = requests.get(target_url, stream=True) + + with open(dest_path, "wb") as out_file: + for chunk in r.iter_content(chunk_size=1024): + if chunk: + out_file.write(chunk) + + @classmethod + def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"): + """ + Assert that the given profile is already ready, that a fallback exists + and that all the configuration is ready + """ + profile_name = args.architect.profile_name + assert ec2_helpers.check_aws_credentials( + profile_name + ), "Given profile doesn't have registered credentials" + + subdomain = args.architect.subdomain + assert "." not in subdomain, "Not allowed to use . in subdomains" + # TODO assert only contains a-zA-Z\- + + # VALID_INSTANCES = [] + # assert args.architect.instance_type in VALID_INSTANCES + + assert os.path.exists( + DEFAULT_FALLBACK_FILE + ), "Must have fallback launched to use EC2 architect" + + with open(DEFAULT_FALLBACK_FILE, "r") as fallback_detail_file: + fallback_details = json.load(fallback_detail_file) + + REQUIRED_KEYS = [ + "key_pair_name", + "security_group_id", + "vpc_details", + "listener_arn", + ] + for key in REQUIRED_KEYS: + assert key in fallback_details, f"Fallback file missing required key {key}" + + session = boto3.Session(profile_name=profile_name, region_name="us-east-2") + assert ec2_helpers.rule_is_new( + session, subdomain, fallback_details["listener_arn"] + ) + + def __get_build_directory(self) -> str: + """ + Return the string where the server should be built in. + """ + return os.path.join( + self.build_dir, + FINAL_SERVER_BUILD_DIRECTORY, + ) + + def __compile_server(self) -> str: + """ + Move the required task files to a specific directory to be deployed to + ec2 directly. Return the location that the packaged files are + now prepared in. + """ + print("Building server files...") + server_build_root = self.__get_build_directory() + os.makedirs(server_build_root) + self.server_dir = server_dir = build_router( + server_build_root, + self.task_run, + version=self.server_type, + server_source_path=self.server_source_path, + ) + setup_path = os.path.join(SCRIPTS_DIRECTORY, self.server_type) + setup_dest = os.path.join(server_build_root, "setup") + shutil.copytree(setup_path, setup_dest) + possible_node_modules = os.path.join( + server_build_root, "router", "node_modules" + ) + if os.path.exists(possible_node_modules): + shutil.rmtree(possible_node_modules) + return server_dir + + def __setup_ec2_server(self) -> str: + """ + Deploy the server using the setup server directory, return the URL + """ + server_dir = os.path.abspath(self.__get_build_directory()) + + print("EC2: Starting instance...") + + # Launch server + ( + server_id, + self.allocation_id, + self.association_id, + ) = ec2_helpers.create_instance( + self.session, + self.fallback_details["key_pair_name"], + self.fallback_details["security_group_id"], + self.fallback_details["vpc_details"]["subnet_1_id"], + self.router_name, + instance_type=self.instance_type, + ) + self.server_id = server_id + + self.created = True + + print("EC2: Configuring routing table...") + # Configure router + ( + self.target_group_arn, + self.router_rule_arn, + ) = ec2_helpers.register_instance_to_listener( + self.session, + server_id, + self.fallback_details["vpc_details"]["vpc_id"], + self.fallback_details["listener_arn"], + self.full_domain, + ) + + # Write out details + server_details = { + "balancer_rule_arn": self.router_rule_arn, + "instance_id": self.server_id, + "ip_allocation_id": self.allocation_id, + "ip_association_id": self.association_id, + "subdomain": self.subdomain, + "target_group_arn": self.target_group_arn, + } + + with open(self.server_detail_path, "w+") as detail_file: + json.dump(server_details, detail_file) + + print("EC2: Deploying server...") + # Push server files and execute launch + ec2_helpers.deploy_to_routing_server( + self.session, + server_id, + self.fallback_details["key_pair_name"], + server_dir, + ) + + return f"https://{self.full_domain}" + + def __delete_ec2_server(self): + """ + Remove the heroku server associated with this task run + """ + server_id = self.server_id + assert server_id is not None, "Cannot shutdown a non-existent server" + print(f"Ec2: Deleting server: {self.server_id}") + if self.router_rule_arn is not None: + ec2_helpers.delete_rule( + self.session, + self.router_rule_arn, + self.target_group_arn, + ) + + ec2_helpers.delete_instance( + self.session, + server_id, + self.allocation_id, + self.association_id, + ) + os.unlink(self.server_detail_path) + + def server_is_running(self) -> bool: + """ + Utility function to check if the given heroku app (by app-name) is + still running + """ + return os.path.exists(self.server_detail_path) + + def build_is_clean(self) -> bool: + """ + Utility function to see if the build has been cleaned up + """ + server_dir = self.__get_build_directory() + return not os.path.exists(server_dir) + + def prepare(self) -> str: + """ + Produce the server files that will be deployed to the server + """ + return self.__compile_server() + + def deploy(self) -> str: + """ + Launch the server, and push the task files to the server. Return + the server URL + """ + return self.__setup_ec2_server() + + def cleanup(self) -> None: + """ + Remove any files that were used for the deployment process that + no longer need to be kept track of now that the task has + been launched. + """ + server_dir = self.__get_build_directory() + shutil.rmtree(server_dir) + + def shutdown(self) -> None: + """ + Shut down the server launched by this Architect, as stored + in the db. + """ + if self.created: # only delete the server if it's created by us + self.__delete_ec2_server() diff --git a/mephisto/abstractions/architects/ec2/ec2_helpers.py b/mephisto/abstractions/architects/ec2/ec2_helpers.py new file mode 100644 index 000000000..eef4293f8 --- /dev/null +++ b/mephisto/abstractions/architects/ec2/ec2_helpers.py @@ -0,0 +1,1086 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import boto3 # type: ignore +import botocore.exceptions # type: ignore +import time +import os +import subprocess +import json +from mephisto.abstractions.providers.mturk.mturk_utils import setup_aws_credentials +from mephisto.abstractions.architects.router import build_router + +from botocore import client # type: ignore +from botocore.exceptions import ClientError, ProfileNotFound # type: ignore +from botocore.config import Config # type: ignore +from mephisto.operations.logger_core import get_logger + +logger = get_logger(name=__name__) + + +from typing import Dict, Optional, Tuple, List, Any, TYPE_CHECKING + +if TYPE_CHECKING: + from omegaconf import DictConfig # type: ignore + +botoconfig = Config( + region_name="us-east-2", retries={"max_attempts": 10, "mode": "standard"} +) + +DEFAULT_AMI_ID = "ami-0f19d220602031aed" +AMI_DEFAULT_USER = "ec2-user" +DEFAULT_INSTANCE_TYPE = "m2.micro" +FALLBACK_INSTANCE_TYPE = "t2.nano" +MY_DIR = os.path.abspath(os.path.dirname(__file__)) +DEFAULT_KEY_PAIR_DIRECTORY = os.path.join(MY_DIR, "keypairs") +DEFAULT_SERVER_DETAIL_LOCATION = os.path.join(MY_DIR, "servers") +SCRIPTS_DIRECTORY = os.path.join(MY_DIR, "run_scripts") +DEFAULT_FALLBACK_FILE = os.path.join(DEFAULT_SERVER_DETAIL_LOCATION, "fallback.json") +FALLBACK_SERVER_LOC = os.path.join(MY_DIR, "fallback_server") +MAX_RETRIES = 10 + + +def check_aws_credentials(profile_name: str) -> bool: + try: + # Check existing credentials + boto3.Session(profile_name=profile_name) + return True + except ProfileNotFound: + return False + + +def setup_ec2_credentials( + profile_name: str, register_args: Optional["DictConfig"] = None +) -> bool: + return setup_aws_credentials(profile_name, register_args) + + +def get_domain_if_available(session: boto3.Session, domain_name: str) -> bool: + """ + Attempt to register the given domain with Route53, return + True if registration is successful, False otherwise. + + Details on valid domains can be found here: + https://docs.aws.amazon.com/Route53/latest/DeveloperGuide/registrar-tld-list.html + + Pricing is available on amazon + """ + client = session.client("route53domains") + avail_result = "PENDING" + + while avail_result == "PENDING": + avail = client.check_domain_availabiliity(DomainName=domain_name) + avail_result = avail["Availability"] + time.sleep(0.3) + + if avail_result not in ["AVAILABLE"]: # May extend to handle other available cases + print( + f"Domain was not listed as available, instead " + f"{avail_result}, visit route53 for more detail" + ) + return False + + print("Automated domain registration isn't yet implemented") + # Registration can be completed using client.register_domain + # Details are available here: + # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/route53domains.html#Route53Domains.Client.register_domain + + return False + + +def find_hosted_zone(session: boto3.Session, domain_name: str) -> Optional[str]: + """ + search for a hosted zone with the given name, return its id + if found and None otherwise + """ + client = session.client("route53") + + zones = client.list_hosted_zones_by_name() + + logger.debug(f"Found zones {zones}") + for zone in zones["HostedZones"]: + if zone["Name"] == f"{domain_name}.": + return zone["Id"] + + return None + + +def create_hosted_zone(session: boto3.Session, domain_name: str) -> str: + """ + Given a domain name, tries to create a hosted zone + for that domain. Returns the hosted zone id + """ + client = session.client("route53") + + zone_id = find_hosted_zone(session, domain_name) + if zone_id is None: + + res = client.create_hosted_zone( + Name=domain_name, + CallerReference=str(time.time()), + HostedZoneConfig={ + "Comment": "Mephisto hosted zone", + }, + ) + nameservers = res["DelegationSet"]["NameServers"] + BOLD_WHITE_ON_BLUE = "\x1b[1;37;44m" + RESET = "\x1b[0m" + print( + f"{BOLD_WHITE_ON_BLUE}" + "Registered new hosted zone! You should ensure your domain " + "name is registered to delegate to the following nameservers: " + f"\n{nameservers}" + f"{RESET}" + ) + zone_id = res["HostedZone"]["Id"] + else: + logger.debug(f"This hosted zone already exists! Returning {zone_id}") + + return zone_id + + +def find_certificate_arn(session: boto3.Session, domain_name: str) -> Optional[str]: + """ + Finds the certificate for the given domain if it exists, and returns + the certification arn. + """ + client = session.client("acm") + certs = client.list_certificates() + logger.debug(f"Found existing certs: {certs}") + for cert in certs["CertificateSummaryList"]: + if cert["DomainName"] == domain_name: + return cert["CertificateArn"] + return None + + +def get_certificate(session: boto3.Session, domain_name: str) -> Dict[str, str]: + """ + Gets the certificate for the given domain name, and returns + the dns validation name and target and cert arn ('Name' and 'Value', 'arn') + """ + client = session.client("acm") + cert_domain_name = f"*.{domain_name}" + certificate_arn = find_certificate_arn(session, cert_domain_name) + if certificate_arn is None: # cert not yet issued + logger.debug("Requesting new certificate") + response = client.request_certificate( + DomainName=cert_domain_name, + ValidationMethod="DNS", + IdempotencyToken=f"{domain_name.split('.')[0]}request", + Options={ + "CertificateTransparencyLoggingPreference": "ENABLED", + }, + ) + certificate_arn = response["CertificateArn"] + else: + logger.debug(f"Using existing certificate {certificate_arn}") + attempts = 0 + sleep_time = 2 + details = None + while attempts < MAX_RETRIES: + try: + details = client.describe_certificate( + CertificateArn=certificate_arn, + ) + return_data = details["Certificate"]["DomainValidationOptions"][0][ + "ResourceRecord" + ] + return_data["arn"] = certificate_arn + return return_data + except KeyError: + # Resource record not created yet, try again + attempts += 1 + logger.info(f"Attempt {attempts} had no certification details, retrying") + time.sleep(sleep_time) + sleep_time *= 2 + raise Exception("Exceeded MAX_RETRIES waiting for certificate records") + + +def register_zone_records( + session: boto3.Session, + zone_id: str, + domain_name: str, + load_balancer_arn: str, + acm_valid_name: str, + acm_valid_target: str, +) -> int: + """ + Creates the required zone records for this mephisto hosted zone. Requires + the load balancer target, and the ACM certificate addresses + + Returns the change id + """ + # Get details about the load balancer + ec2_client = session.client("elbv2") + balancer = ec2_client.describe_load_balancers( + LoadBalancerArns=[load_balancer_arn], + )["LoadBalancers"][0] + load_balancer_dns = balancer["DNSName"] + load_balancer_zone = balancer["CanonicalHostedZoneId"] + + # Create the records + client = session.client("route53") + response = client.change_resource_record_sets( + HostedZoneId=zone_id, + ChangeBatch={ + "Comment": "Creating records for Mephisto load balancer and DNS validations for certs", + "Changes": [ + { + "Action": "CREATE", + "ResourceRecordSet": { + "Name": f"*.{domain_name}", + "Type": "A", + "AliasTarget": { + "HostedZoneId": load_balancer_zone, + "DNSName": load_balancer_dns, + "EvaluateTargetHealth": True, + }, + }, + }, + { + "Action": "CREATE", + "ResourceRecordSet": { + "Name": f"{domain_name}", + "Type": "A", + "AliasTarget": { + "HostedZoneId": load_balancer_zone, + "DNSName": load_balancer_dns, + "EvaluateTargetHealth": True, + }, + }, + }, + { + "Action": "CREATE", + "ResourceRecordSet": { + "Name": acm_valid_name, + "Type": "CNAME", + "TTL": 300, + "ResourceRecords": [ + {"Value": acm_valid_target}, + ], + }, + }, + ], + }, + ) + return response["ChangeInfo"]["Id"] + + +def create_mephisto_vpc(session: boto3.Session) -> Dict[str, str]: + """ + Create the required vpc with two subnets, an associated + internet gateway, and routing tables. + + Currently sets up using US-east for both subnets + """ + client = session.client("ec2") + + # Create VPC + vpc_response = client.create_vpc( + CidrBlock="10.0.0.0/16", + TagSpecifications=[ + { + "ResourceType": "vpc", + "Tags": [{"Key": "Name", "Value": "mephisto-core-vpc"}], + } + ], + ) + vpc_id = vpc_response["Vpc"]["VpcId"] + + # Create internet gateway + gateway_response = client.create_internet_gateway( + TagSpecifications=[ + { + "ResourceType": "internet-gateway", + "Tags": [{"Key": "Name", "Value": "mephisto-gateway"}], + } + ], + ) + gateway_id = gateway_response["InternetGateway"]["InternetGatewayId"] + client.attach_internet_gateway( + InternetGatewayId=gateway_id, + VpcId=vpc_id, + ) + + # Create subnets + subnet_1_response = client.create_subnet( + TagSpecifications=[ + { + "ResourceType": "subnet", + "Tags": [{"Key": "Name", "Value": "mephisto-subnet-1"}], + } + ], + CidrBlock="10.0.0.0/24", + AvailabilityZone="us-east-2a", + VpcId=vpc_id, + ) + subnet_1_id = subnet_1_response["Subnet"]["SubnetId"] + + subnet_2_response = client.create_subnet( + TagSpecifications=[ + { + "ResourceType": "subnet", + "Tags": [{"Key": "Name", "Value": "mephisto-subnet-2"}], + } + ], + CidrBlock="10.0.1.0/24", + AvailabilityZone="us-east-2b", + VpcId=vpc_id, + ) + subnet_2_id = subnet_2_response["Subnet"]["SubnetId"] + + # Create routing tables + table_1_response = client.create_route_table( + TagSpecifications=[ + { + "ResourceType": "route-table", + "Tags": [{"Key": "Name", "Value": "mephisto-routes-1"}], + } + ], + VpcId=vpc_id, + ) + route_table_1_id = table_1_response["RouteTable"]["RouteTableId"] + + table_2_response = client.create_route_table( + TagSpecifications=[ + { + "ResourceType": "route-table", + "Tags": [{"Key": "Name", "Value": "mephisto-routes-2"}], + } + ], + VpcId=vpc_id, + ) + route_table_2_id = table_2_response["RouteTable"]["RouteTableId"] + + # Add routes in tables to gateway + client.create_route( + DestinationCidrBlock="0.0.0.0/0", + GatewayId=gateway_id, + RouteTableId=route_table_1_id, + ) + client.create_route( + DestinationCidrBlock="0.0.0.0/0", + GatewayId=gateway_id, + RouteTableId=route_table_2_id, + ) + + # Associate routing tables + client.associate_route_table( + RouteTableId=route_table_1_id, + SubnetId=subnet_1_id, + ) + client.associate_route_table( + RouteTableId=route_table_2_id, + SubnetId=subnet_2_id, + ) + + return { + "vpc_id": vpc_id, + "gateway_id": gateway_id, + "subnet_1_id": subnet_1_id, + "subnet_2_id": subnet_2_id, + "route_1_id": route_table_1_id, + "route_2_id": route_table_2_id, + } + + +def create_security_group(session: boto3.Session, vpc_id: str, ssh_ip: str) -> str: + """ + Create a security group with public access + for 80 and 443, but only access from ssh_ip (comma-separated) for 22 + """ + client = session.client("ec2") + + create_response = client.create_security_group( + Description="Security group used for Mephisto host servers", + GroupName="mephisto-server-security-group", + VpcId=vpc_id, + TagSpecifications=[ + { + "ResourceType": "security-group", + "Tags": [{"Key": "Name", "Value": "mephisto-server-security-group"}], + } + ], + ) + group_id = create_response["GroupId"] + ssh_perms = [ + { + "FromPort": 22, + "ToPort": 22, + "IpProtocol": "tcp", + "IpRanges": [ + { + "CidrIp": one_ip, + "Description": "SSH from allowed ip", + } + ], + } + for one_ip in ssh_ip.split(",") + ] + + response = client.authorize_security_group_ingress( + GroupId=group_id, + IpPermissions=[ + { + "FromPort": 80, + "ToPort": 80, + "IpProtocol": "tcp", + "IpRanges": [ + { + "CidrIp": "0.0.0.0/0", + "Description": "Public insecure http access", + } + ], + }, + { + "FromPort": 80, + "ToPort": 80, + "IpProtocol": "tcp", + "Ipv6Ranges": [ + { + "CidrIpv6": "::/0", + "Description": "Public insecure http access", + } + ], + }, + { + "FromPort": 5000, + "ToPort": 5000, + "IpProtocol": "tcp", + "IpRanges": [ + { + "CidrIp": "0.0.0.0/0", + "Description": "Internal router access", + } + ], + }, + { + "FromPort": 5000, + "ToPort": 5000, + "IpProtocol": "tcp", + "Ipv6Ranges": [ + { + "CidrIpv6": "::/0", + "Description": "Internal router access", + } + ], + }, + { + "FromPort": 443, + "ToPort": 443, + "IpProtocol": "tcp", + "IpRanges": [ + { + "CidrIp": "0.0.0.0/0", + "Description": "Public secure http access", + } + ], + }, + { + "FromPort": 443, + "ToPort": 443, + "IpProtocol": "tcp", + "Ipv6Ranges": [ + { + "CidrIpv6": "::/0", + "Description": "Public secure http access", + } + ], + }, + ] + + ssh_perms, + ) + + assert response["ResponseMetadata"]["HTTPStatusCode"] == 200 + return group_id + + +def create_key_pair( + session: boto3.Session, + key_name: str, + key_pair_dir: str = DEFAULT_KEY_PAIR_DIRECTORY, +) -> str: + """ + creates a key pair by the given name, and writes it to file + """ + target_keypair_filename = os.path.join(key_pair_dir, f"{key_name}.pem") + if os.path.exists(target_keypair_filename): + logger.warning(f"Keypair already exists! {target_keypair_filename}") + return target_keypair_filename + client = session.client("ec2") + + response = client.create_key_pair( + KeyName=key_name, + TagSpecifications=[ + {"ResourceType": "key-pair", "Tags": [{"Key": "Name", "Value": key_name}]} + ], + ) + with open(target_keypair_filename, "w+") as keypair_file: + keypair_file.write(response["KeyMaterial"]) + subprocess.check_call(["chmod", "400", target_keypair_filename]) + + return target_keypair_filename + + +def create_instance( + session: boto3.Session, + key_pair_name: str, + security_group_id: str, + subnet_id: str, + instance_name: str, + volume_size: int = 8, + instance_type: str = DEFAULT_INSTANCE_TYPE, +) -> Tuple[str, str, str]: + """ + Create an instance, return the instance id, allocation id, and association id + """ + client = session.client("ec2") + instance_response = client.run_instances( + BlockDeviceMappings=[ + { + "DeviceName": "/dev/xvda", + "Ebs": { + "DeleteOnTermination": True, + "VolumeSize": volume_size, + "VolumeType": "gp2", + "Encrypted": True, + }, + } + ], + ImageId=DEFAULT_AMI_ID, + InstanceType=instance_type, + KeyName=key_pair_name, + MaxCount=1, + MinCount=1, + Monitoring={ + "Enabled": False, # standard monitoring is enough + }, + Placement={ + "Tenancy": "default", + }, + SecurityGroupIds=[security_group_id], + SubnetId=subnet_id, + DisableApiTermination=False, # we need to allow shutdown from botocore + # IamInstanceProfile={ # Maybe we can move the iam role to do rest of registration? + # 'Arn': 'string', + # 'Name': 'string' + # }, + InstanceInitiatedShutdownBehavior="stop", + TagSpecifications=[ + { + "ResourceType": "instance", + "Tags": [ + { + "Key": "Name", + "Value": instance_name, + }, + ], + }, + ], + HibernationOptions={"Configured": False}, + MetadataOptions={ + "HttpTokens": "optional", + "HttpEndpoint": "enabled", + }, + EnclaveOptions={"Enabled": False}, + ) + instance_id = instance_response["Instances"][0]["InstanceId"] + + allocation_response = client.allocate_address( + Domain="vpc", + TagSpecifications=[ + { + "ResourceType": "elastic-ip", + "Tags": [ + { + "Key": "Name", + "Value": f"{instance_name}-ip-address", + } + ], + } + ], + ) + allocation_id = allocation_response["AllocationId"] + + logger.debug(f"Waiting for instance {instance_id} to come up before continuing") + waiter = client.get_waiter("instance_running") + waiter.wait( + InstanceIds=[instance_id], + ) + + associate_response = client.associate_address( + AllocationId=allocation_id, + InstanceId=instance_id, + AllowReassociation=False, + ) + association_id = associate_response["AssociationId"] + + return instance_id, allocation_id, association_id + + +def create_target_group( + session: boto3.Session, + vpc_id: str, + instance_id: str, + group_name="mephisto-fallback-group", +) -> str: + """ + Create a target group for the given instance + """ + client = session.client("elbv2") + create_target_response = client.create_target_group( + Name=group_name[:32], + Protocol="HTTP", + ProtocolVersion="HTTP1", + Port=5000, + VpcId=vpc_id, + Matcher={ + "HttpCode": "200-299", + }, + TargetType="instance", + Tags=[ + {"Key": "string", "Value": "string"}, + ], + ) + target_group_arn = create_target_response["TargetGroups"][0]["TargetGroupArn"] + + client.register_targets( + TargetGroupArn=target_group_arn, + Targets=[ + { + "Id": instance_id, + } + ], + ) + + return target_group_arn + + +def rule_is_new( + session: boto3.Session, + subdomain: str, + listener_arn: str, +) -> bool: + """ + Check to see if a rule already exists with the given subdomain + """ + client = session.client("elbv2") + find_rule_response = client.describe_rules( + ListenerArn=listener_arn, + ) + rules = find_rule_response["Rules"] + for rule in rules: + if len(rule["Conditions"]) == 0: + continue # base rule + host_condition = rule["Conditions"][0] + values = host_condition.get("Values") + if values is None or len(values) == 0: + values = host_condition["HostHeaderConfig"]["Values"] + existing = values[0] + if existing.startswith(f"{subdomain}."): + return False + + return True + + +def register_instance_to_listener( + session: boto3.Session, + instance_id: str, + vpc_id: str, + listener_arn: str, + domain: str, +) -> Tuple[str, str]: + """ + Creates a rule for this specific redirect case, + and returns the target group id and rule arn + """ + subdomain_root = domain.split(".")[0] + target_group_arn = create_target_group( + session, vpc_id, instance_id, f"{subdomain_root[:28]}-tg" + ) + client = session.client("elbv2") + + find_rule_response = client.describe_rules( + ListenerArn=listener_arn, + ) + rule_count = len(find_rule_response["Rules"]) + + rule_response = client.create_rule( + ListenerArn=listener_arn, + Conditions=[ + { + "Field": "host-header", + "HostHeaderConfig": { + "Values": [ + domain, + f"*.{domain}", + ], + }, + }, + ], + Priority=rule_count + 1, + Actions=[ + { + "Type": "forward", + "TargetGroupArn": target_group_arn, + }, + ], + ) + rule_arn = rule_response["Rules"][0]["RuleArn"] + + return target_group_arn, rule_arn + + +def create_load_balancer( + session: boto3.Session, + subnet_ids: List[str], + security_group_id: str, + vpc_id: str, +) -> str: + """ + Creates a load balancer and returns the balancer's arn + """ + client = session.client("elbv2") + + create_response = client.create_load_balancer( + Name="mephisto-hosts-balancer", + Subnets=subnet_ids, + SecurityGroups=[security_group_id], + Scheme="internet-facing", + Type="application", + IpAddressType="ipv4", + ) + balancer_arn = create_response["LoadBalancers"][0]["LoadBalancerArn"] + return balancer_arn + + +def configure_base_balancer( + session: boto3.Session, + balancer_arn: str, + certificate_arn: str, + target_group_arn: str, +) -> str: + """ + Configure the default rules for this load balancer. Return the id + of the listener to add rules to for redirecting to specified target groups + """ + + client = session.client("elbv2") + + _redirect_response = client.create_listener( + LoadBalancerArn=balancer_arn, + Protocol="HTTP", + Port=80, + DefaultActions=[ + { + "Type": "redirect", + "RedirectConfig": { + "Protocol": "HTTPS", + "Port": "443", + "Host": "#{host}", + "Path": "/#{path}", + "Query": "#{query}", + "StatusCode": "HTTP_301", + }, + } + ], + ) + + forward_response = client.create_listener( + LoadBalancerArn=balancer_arn, + Protocol="HTTPS", + Port=443, + SslPolicy="ELBSecurityPolicy-2016-08", + Certificates=[ + { + "CertificateArn": certificate_arn, + } + ], + DefaultActions=[ + { + "Type": "forward", + "TargetGroupArn": target_group_arn, + } + ], + ) + listener_arn = forward_response["Listeners"][0]["ListenerArn"] + return listener_arn + + +def deploy_fallback_server( + session: boto3.Session, + instance_id: str, + key_pair: str, + log_access_pass: str, +) -> bool: + """ + Deploy the fallback server to the given instance, + return True if successful + """ + client = session.client("ec2") + server_response = client.describe_instances(InstanceIds=[instance_id]) + server_host = server_response["Reservations"][0]["Instances"][0]["PublicIpAddress"] + keypair_file = os.path.join(DEFAULT_KEY_PAIR_DIRECTORY, f"{key_pair}.pem") + password_file_name = os.path.join(FALLBACK_SERVER_LOC, f"access_key.txt") + with open(password_file_name, "w+") as password_file: + password_file.write(log_access_pass) + + remote_server = f"{AMI_DEFAULT_USER}@{server_host}" + + dest = f"{remote_server}:/home/ec2-user/" + subprocess.check_call( + [ + "scp", + "-o", + "StrictHostKeyChecking=no", + "-i", + keypair_file, + "-r", + f"{FALLBACK_SERVER_LOC}", + dest, + ] + ) + subprocess.check_call( + [ + "ssh", + "-i", + keypair_file, + remote_server, + "bash", + "/home/ec2-user/fallback_server/scripts/first_setup.sh", + ] + ) + + os.unlink(password_file_name) + return True + + +def deploy_to_routing_server( + session: boto3.Session, + instance_id: str, + key_pair: str, + push_directory: str, +) -> bool: + client = session.client("ec2") + server_response = client.describe_instances(InstanceIds=[instance_id]) + server_host = server_response["Reservations"][0]["Instances"][0]["PublicIpAddress"] + keypair_file = os.path.join(DEFAULT_KEY_PAIR_DIRECTORY, f"{key_pair}.pem") + + remote_server = f"{AMI_DEFAULT_USER}@{server_host}" + + print("Uploading files to server, then attempting to run") + dest = f"{remote_server}:/home/ec2-user/" + retries = 5 + sleep_time = 10.0 + while retries > 0: + try: + subprocess.check_call( + [ + "scp", + "-o", + "StrictHostKeyChecking=no", + "-i", + keypair_file, + "-r", + f"{push_directory}", + dest, + ] + ) + break + except subprocess.CalledProcessError: + retries -= 1 + sleep_time *= 1.5 + logger.info( + f"Timed out trying to push to server. Retries remaining: {retries}" + ) + time.sleep(sleep_time) + subprocess.check_call( + [ + "ssh", + "-i", + keypair_file, + remote_server, + "bash", + "/home/ec2-user/routing_server/setup/init_server.sh", + ] + ) + print("Server setup complete!") + + return True + + +def delete_rule( + session: boto3.Session, + rule_arn: str, + target_group_arn: str, +) -> None: + """ + Remove the given rule and the target group for this rule + """ + client = session.client("elbv2") + client.delete_rule( + RuleArn=rule_arn, + ) + + client.delete_target_group( + TargetGroupArn=target_group_arn, + ) + + +def delete_instance( + session: boto3.Session, + instance_id: str, + allocation_id: str, + association_id: str, +) -> None: + """ + Remove the given instance and the associated elastic ip + """ + client = session.client("ec2") + client.disassociate_address( + AssociationId=association_id, + ) + + client.release_address( + AllocationId=allocation_id, + ) + + client.terminate_instances(InstanceIds=[instance_id]) + + +def remove_instance_and_cleanup( + session: boto3.Session, + server_name: str, +) -> None: + """ + Cleanup for a launched server, removing the redirect rule + clearing the target group, and then shutting down the instance. + """ + server_detail_path = os.path.join( + DEFAULT_SERVER_DETAIL_LOCATION, f"{server_name}.json" + ) + + with open(server_detail_path, "r") as detail_file: + details = json.load(detail_file) + + delete_rule(session, details["balancer_rule_arn"], details["target_group_arn"]) + delete_instance( + session, + details["instance_id"], + details["ip_allocation_id"], + details["ip_association_id"], + ) + os.unlink(server_detail_path) + return None + + +def delete_listener( + session: boto3.Session, + listener_arn: str, +) -> None: + client = session.client("elbv2") + client.delete_listener( + ListenerArn=listener_arn, + ) + + +def cleanup_fallback_server( + iam_profile: str, + delete_hosted_zone: bool = False, + server_details_file: str = DEFAULT_FALLBACK_FILE, +) -> None: + """ + Cleans up all of the resources for the given iam profile, + assuming that the details are stored in the given + server_details_file. + + Optionally includes deleting the hosted zone, which remains + an option due to the DNS changes required + """ + session = boto3.Session(profile_name=iam_profile, region_name="us-east-2") + + elb_client = session.client("elbv2") + ec2_client = session.client("ec2") + + server_details_file = ( + DEFAULT_FALLBACK_FILE if server_details_file is None else server_details_file + ) + with open(server_details_file, "r") as details_file: + details = json.load(details_file) + + listener_arn = details.get("listener_arn") + if listener_arn is not None: + print(f"Deleting listener {listener_arn}...") + elb_client.delete_listener( + ListenerArn=listener_arn, + ) + + target_group_arn = details.get("target_group_arn") + if target_group_arn is not None: + print(f"Deleting target group {target_group_arn}...") + elb_client.delete_target_group( + TargetGroupArn=target_group_arn, + ) + + balancer_arn = details.get("balancer_arn") + if balancer_arn is not None: + print(f"Deleting balancer {balancer_arn}...") + elb_client.delete_load_balancer( + LoadBalancerArn=balancer_arn, + ) + + instance_id = details.get("instance_id") + ip_allocation_id = details.get("ip_allocation_id") + ip_association_id = details.get("ip_association_id") + if instance_id is not None: + print(f"Deleting instance {instance_id}...") + delete_instance(session, instance_id, ip_allocation_id, ip_association_id) + + vpc_details = details.get("vpc_details") + if vpc_details is not None: + print(f"Deleting vpc {vpc_details['vpc_id']} and related resources...") + ec2_client.delete_subnet(SubnetId=vpc_details["subnet_1_id"]) + ec2_client.delete_subnet(SubnetId=vpc_details["subnet_2_id"]) + ec2_client.delete_route_table(RouteTableId=vpc_details["route_1_id"]) + ec2_client.delete_route_table(RouteTableId=vpc_details["route_2_id"]) + table_response = ec2_client.describe_route_tables( + Filters=[ + { + "Name": "vpc-id", + "Values": [vpc_details["vpc_id"]], + } + ] + ) + tables = table_response["RouteTables"] + for table in tables: + ec2_client.delete_route_table(RouteTableId=table["RouteTableId"]) + + ec2_client.delete_internet_gateway(InternetGatewayId=vpc_details["gateway_id"]) + + security_group_id = details.get("security_group_id") + if security_group_id is not None: + print("Deleting security group {security_group_id}...") + ec2_client.delete_security_group( + GroupId=security_group_id, + ) + + ec2_client.delete_vpc(VpcId=vpc_details["vpc_id"]) + + if delete_hosted_zone: + hosted_zone_id = details.get("hosted_zone_id") + if hosted_zone_id is not None: + route53_client = session.client("route53") + print( + "Deleting hosted zones not yet implemented, " + "navigate to the AWS Route53 console to complete " + f"this step, deleting {hosted_zone_id}" + ) + # To delete a hosted zone, we need to query it + # for the list of records, than remove all + # that aren't SOA/NS + + os.unlink(server_details_file) + return None diff --git a/mephisto/abstractions/architects/ec2/fallback_server/README.md b/mephisto/abstractions/architects/ec2/fallback_server/README.md new file mode 100644 index 000000000..01ed796da --- /dev/null +++ b/mephisto/abstractions/architects/ec2/fallback_server/README.md @@ -0,0 +1,7 @@ +# Mephisto Fallback server +The goal of this server is to catch events from requests trying to hit a Mephisto task server that is no longer running. + +Any access from any subdomain will be provided the error page, and we log the access domain as well as path params with a timestamp. + +### Viewing logs +Access to the path `view_logs` with the url param `access_key` set to the same key as is present in the server's `access_key.txt` file will instead return a json of all of the logged events. You can provide an optional `timestamp` parameter to only return events after that timestamp. \ No newline at end of file diff --git a/mephisto/abstractions/architects/ec2/fallback_server/access_logs/README.md b/mephisto/abstractions/architects/ec2/fallback_server/access_logs/README.md new file mode 100644 index 000000000..f3a231279 --- /dev/null +++ b/mephisto/abstractions/architects/ec2/fallback_server/access_logs/README.md @@ -0,0 +1,5 @@ +# Access Logs + +This directory is used to hold any access logs written to the server. + +Logs are written in the filename format `"{host}-{curr_time}-access.json"` \ No newline at end of file diff --git a/mephisto/abstractions/architects/ec2/fallback_server/app.py b/mephisto/abstractions/architects/ec2/fallback_server/app.py new file mode 100644 index 000000000..3ebb7fa1a --- /dev/null +++ b/mephisto/abstractions/architects/ec2/fallback_server/app.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import json +import time +from flask import Flask, request, render_template +from urllib.parse import urlparse +from gevent.pywsgi import WSGIServer + +app = Flask(__name__) + +MY_DIR = os.path.abspath(os.path.dirname(__file__)) +LOG_BASE = os.path.join(MY_DIR, "access_logs") +PASSWORD_FILE = os.path.join(MY_DIR, "access_key.txt") +with open(PASSWORD_FILE, "r") as password_file: + PASSWORD = password_file.read().strip() + + +@app.route("/view_logs") +def get_details(): + args = request.args + if args.get("access_key") != PASSWORD: + return main_route("view_logs") + + timestamp = args.get("timestamp", 0) + found_logs = [] + for log_path in os.listdir(LOG_BASE): + if not log_path.endswith(".json"): + continue + with open(os.path.join(LOG_BASE, log_path), "r") as log_file: + data = json.load(log_file) + if data["timestamp"] > timestamp: + found_logs.append(data) + + return {"logs": found_logs} + + +@app.route("/", defaults={"path": ""}) +@app.route("/") +def main_route(path): + args = dict(request.args) + o = urlparse(request.base_url) + host = o.hostname + curr_time = time.time() + if not host.startswith("10."): + filename = f"{host}-{curr_time}-access.json" + access_log = { + "timestamp": curr_time, + "args": args, + "host": host, + } + with open(os.path.join(LOG_BASE, filename), "w+") as log_file: + json.dump(access_log, log_file) + + return render_template("landing.html") + + +http_server = WSGIServer(("", 5000), app) +print("Launching server!", http_server) +http_server.serve_forever() diff --git a/mephisto/abstractions/architects/ec2/fallback_server/fallback.service b/mephisto/abstractions/architects/ec2/fallback_server/fallback.service new file mode 100644 index 000000000..c000c515d --- /dev/null +++ b/mephisto/abstractions/architects/ec2/fallback_server/fallback.service @@ -0,0 +1,12 @@ +[Unit] +After=network.service +Description=Fallback server application + +[Service] +Type=simple +User=ec2-user +Restart=always +ExecStart=/home/ec2-user/fallback_server/scripts/run_server.sh + +[Install] +WantedBy=multi-user.target diff --git a/mephisto/abstractions/architects/ec2/fallback_server/scripts/first_setup.sh b/mephisto/abstractions/architects/ec2/fallback_server/scripts/first_setup.sh new file mode 100644 index 000000000..6fca95313 --- /dev/null +++ b/mephisto/abstractions/architects/ec2/fallback_server/scripts/first_setup.sh @@ -0,0 +1,15 @@ +echo "Installing requirements" +sudo yum update -y >> /home/ec2-user/setup_log.txt 2>&1 +sudo yum install -y httpd >> /home/ec2-user/setup_log.txt 2>&1 + +sudo -H pip3 install flask gevent >> /home/ec2-user/setup_log.txt 2>&1 + +echo "Preparing service..." +sudo cp /home/ec2-user/fallback_server/fallback.service /etc/systemd/system/fallback.service >> /home/ec2-user/setup_log.txt 2>&1 +sudo chmod 744 /home/ec2-user/fallback_server/scripts/run_server.sh >> /home/ec2-user/setup_log.txt 2>&1 +sudo chmod 664 /etc/systemd/system/fallback.service >> /home/ec2-user/setup_log.txt 2>&1 + +echo "Launching service..." +sudo systemctl daemon-reload >> /home/ec2-user/setup_log.txt 2>&1 +sudo systemctl enable fallback.service >> /home/ec2-user/setup_log.txt 2>&1 +sudo systemctl start fallback.service >> /home/ec2-user/setup_log.txt 2>&1 diff --git a/mephisto/abstractions/architects/ec2/fallback_server/scripts/run_server.sh b/mephisto/abstractions/architects/ec2/fallback_server/scripts/run_server.sh new file mode 100644 index 000000000..7560b419d --- /dev/null +++ b/mephisto/abstractions/architects/ec2/fallback_server/scripts/run_server.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +cd /home/ec2-user +which python3 +python3 /home/ec2-user/fallback_server/app.py diff --git a/mephisto/abstractions/architects/ec2/fallback_server/templates/landing.html b/mephisto/abstractions/architects/ec2/fallback_server/templates/landing.html new file mode 100644 index 000000000..5989c3946 --- /dev/null +++ b/mephisto/abstractions/architects/ec2/fallback_server/templates/landing.html @@ -0,0 +1,33 @@ + + + Task content missing! + + +
+
+

+ Oops! Something went wrong with this task... +

+

+ If you're seeing this page, it's because the server that was hosting + the task that you were trying to reach is no longer accessible. Often + this happens when the author of the task requests it to be taken down + but the request failed, or if something goes severely wrong with the + task setup. +

+

+ If you were intending on seeing a task here, we've logged and notified + the task owner so that this outstanding task can be taken down. Sorry + for the inconvenience, and thank you for your work! +

+
+
+ + diff --git a/mephisto/abstractions/architects/ec2/keypairs/README.md b/mephisto/abstractions/architects/ec2/keypairs/README.md new file mode 100644 index 000000000..094423e66 --- /dev/null +++ b/mephisto/abstractions/architects/ec2/keypairs/README.md @@ -0,0 +1,6 @@ +# Default storage for Mephisto Keypairs +This directory is used as the default storage for mephisto keypairs + +you likely should never commit anything that is saved in this folder. + +In the future, we will make it such that committing things in this folder will result in a failed commit. \ No newline at end of file diff --git a/mephisto/abstractions/architects/ec2/prepare_ec2_servers.py b/mephisto/abstractions/architects/ec2/prepare_ec2_servers.py new file mode 100644 index 000000000..0279ab0b9 --- /dev/null +++ b/mephisto/abstractions/architects/ec2/prepare_ec2_servers.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import mephisto.abstractions.architects.ec2.ec2_helpers as ec2_helpers +from mephisto.abstractions.architects.ec2.ec2_helpers import ( + DEFAULT_FALLBACK_FILE, + FALLBACK_INSTANCE_TYPE, +) +import boto3 # type: ignore +import os +import json + +from typing import Dict, Any + +DEFAULT_KEY_PAIR_NAME = "mephisto-server-key" + + +def update_details( + open_file, + new_data: Dict[str, Any], +): + """ + Overwrite the contents of the open file with the given data. + """ + open_file.seek(0) + open_file.truncate(0) + json.dump(new_data, open_file, sort_keys=True, indent=4) + + +def launch_ec2_fallback( + iam_profile: str, # Iam role name, should be saved in aws credentials + domain_name: str, + ssh_ip_block: str, + access_logs_key: str, + key_pair_name: str = DEFAULT_KEY_PAIR_NAME, + server_details_file: str = DEFAULT_FALLBACK_FILE, + instance_type: str = FALLBACK_INSTANCE_TYPE, +) -> Dict[str, Any]: + """ + This function is used to set up a mephisto + vpc and fallback server for the AWS setup. At the moment + it requires that you already have a domain registered, + and it is up to you to delegate the domain to the + amazon nameservers created by this function. This + function will request the ssl certificate from amazon + + At the moment, it only works on the us-east region. + Feel free to open a PR to extend this functionality + if you need another region! + """ + assert not domain_name.startswith("www."), ( + "You should provide a domain name without www, like 'example.com', " + "or 'crowdsourcing.example.com'" + ) + key_pair_name = DEFAULT_KEY_PAIR_NAME if key_pair_name is None else key_pair_name + server_details_file = ( + DEFAULT_FALLBACK_FILE if server_details_file is None else server_details_file + ) + instance_type = FALLBACK_INSTANCE_TYPE if instance_type is None else instance_type + + session = boto3.Session(profile_name=iam_profile, region_name="us-east-2") + + try: + with open(server_details_file, "r") as saved_details_file: + existing_details = json.load(saved_details_file) + except: + existing_details = {"domain": domain_name, "cidr": ssh_ip_block} + + with open(server_details_file, "w+") as saved_details_file: + # Get a ssl certificate for the domain + cert_details = existing_details.get("cert_details") + if cert_details is None: + print("Getting a certificate for the given domain...") + cert_details = ec2_helpers.get_certificate(session, domain_name) + existing_details["cert_details"] = cert_details + update_details(saved_details_file, existing_details) + else: + print("Using existing certificate") + + # Create a hosted zone for the given domain + hosted_zone_id = existing_details.get("hosted_zone_id") + if hosted_zone_id is None: + print("Creating hosted zone for the given domain...") + hosted_zone_id = ec2_helpers.create_hosted_zone(session, domain_name) + existing_details["hosted_zone_id"] = hosted_zone_id + update_details(saved_details_file, existing_details) + else: + print(f"Using existing hosted zone {hosted_zone_id}") + + # Create the VPC to hold the servers + vpc_details = existing_details.get("vpc_details") + if vpc_details is None: + print("Initializing VPC...") + vpc_details = ec2_helpers.create_mephisto_vpc(session) + existing_details["vpc_details"] = vpc_details + update_details(saved_details_file, existing_details) + else: + print(f"Using existing vpc {vpc_details['vpc_id']}") + + # Set up a security group for everything + security_group_id = existing_details.get("security_group_id") + if security_group_id is None: + print("Creating security group...") + security_group_id = ec2_helpers.create_security_group( + session, vpc_details["vpc_id"], ssh_ip_block + ) + existing_details["security_group_id"] = security_group_id + update_details(saved_details_file, existing_details) + else: + print(f"Using existing security group {security_group_id}") + + # Create a keypair for the server + key_pair_filename = existing_details.get("key_pair_filename") + if key_pair_filename is None: + print(f"Generating keypair named {key_pair_name}") + key_pair_filename = ec2_helpers.create_key_pair(session, key_pair_name) + existing_details["key_pair_filename"] = key_pair_filename + update_details(saved_details_file, existing_details) + else: + print(f"Using existing keypair at {key_pair_filename}") + + # Create the instance running the fallback server + instance_id = existing_details.get("instance_id") + ip_allocation_id = existing_details.get("ip_allocation_id") + ip_association_id = existing_details.get("ip_association_id") + if instance_id is None: + print("Creating a new instance for fallback server...") + ( + instance_id, + ip_allocation_id, + ip_association_id, + ) = ec2_helpers.create_instance( + session, + key_pair_name, + security_group_id, + vpc_details["subnet_1_id"], + "mephisto-default-fallover", + instance_type=instance_type, + ) + existing_details["instance_id"] = instance_id + existing_details["ip_allocation_id"] = ip_allocation_id + existing_details["ip_association_id"] = ip_association_id + update_details(saved_details_file, existing_details) + else: + print(f"Using existing instance {instance_id}") + + # Create load balancer + balancer_arn = existing_details.get("balancer_arn") + if balancer_arn is None: + print("Creating load balancer...") + balancer_arn = ec2_helpers.create_load_balancer( + session, + [vpc_details["subnet_1_id"], vpc_details["subnet_2_id"]], + security_group_id, + vpc_details["vpc_id"], + ) + + print("Registering to hosted zone") + ec2_helpers.register_zone_records( + session, + existing_details["hosted_zone_id"], + domain_name, + balancer_arn, + cert_details["Name"], + cert_details["Value"], + ) + + existing_details["balancer_arn"] = balancer_arn + update_details(saved_details_file, existing_details) + else: + print(f"Using existing balancer {balancer_arn}") + + # Create the target group for the fallback instance + target_group_arn = existing_details.get("target_group_arn") + if target_group_arn is None: + print("Creating target group...") + target_group_arn = ec2_helpers.create_target_group( + session, vpc_details["vpc_id"], instance_id + ) + existing_details["target_group_arn"] = target_group_arn + update_details(saved_details_file, existing_details) + else: + print(f"Using existing target group {target_group_arn}") + + # Create listener in balancer to direct to target group + listener_arn = existing_details.get("listener_arn") + if listener_arn is None: + print("Creating listener for load balancer...") + listener_arn = ec2_helpers.configure_base_balancer( + session, + balancer_arn, + cert_details["arn"], + target_group_arn, + ) + existing_details["listener_arn"] = listener_arn + update_details(saved_details_file, existing_details) + else: + print(f"Using existing listener {listener_arn}") + + # Finally, deploy the fallback server contents: + ec2_helpers.deploy_fallback_server( + session, instance_id, key_pair_name, access_logs_key + ) + existing_details["access_logs_key"] = access_logs_key + update_details(saved_details_file, existing_details) + + return existing_details + + +# TODO Hydrize +def main(): + iam_role_name = input("Please enter local profile name for IAM role\n>> ") + ec2_helpers.setup_ec2_credentials(iam_role_name) + + domain_name = input("Please provide the domain name you will be using\n>> ") + ssh_ip_block = input("Provide the CIDR IP block for ssh access\n>> ") + access_logs_key = input( + "Please provide a key password to use for accessing server logs\n>> " + ) + launch_ec2_fallback(iam_role_name, domain_name, ssh_ip_block, access_logs_key) + + +if __name__ == "__main__": + main() diff --git a/mephisto/abstractions/architects/ec2/run_scripts/flask/init_server.sh b/mephisto/abstractions/architects/ec2/run_scripts/flask/init_server.sh new file mode 100644 index 000000000..d2af6510e --- /dev/null +++ b/mephisto/abstractions/architects/ec2/run_scripts/flask/init_server.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +echo "Installing requirements" +sudo yum update -y >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 +sudo yum install -y httpd >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 + +sudo -H pip3 install flask gevent >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 + +echo "Preparing service..." +sudo cp /home/ec2-user/routing_server/setup/router.service /etc/systemd/system/router.service +sudo chmod 744 /home/ec2-user/routing_server/setup/run_server.sh +sudo chmod 664 /etc/systemd/system/router.service + +echo "Launching service..." +sudo systemctl daemon-reload >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 +sudo systemctl enable router.service >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 +sudo systemctl start router.service >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 diff --git a/mephisto/abstractions/architects/ec2/run_scripts/flask/router.service b/mephisto/abstractions/architects/ec2/run_scripts/flask/router.service new file mode 100644 index 000000000..f5da8541a --- /dev/null +++ b/mephisto/abstractions/architects/ec2/run_scripts/flask/router.service @@ -0,0 +1,12 @@ +[Unit] +After=network.service +Description=Routing server application + +[Service] +Type=simple +User=ec2-user +Restart=always +ExecStart=/home/ec2-user/routing_server/setup/run_server.sh + +[Install] +WantedBy=multi-user.target diff --git a/mephisto/abstractions/architects/ec2/run_scripts/flask/run_server.sh b/mephisto/abstractions/architects/ec2/run_scripts/flask/run_server.sh new file mode 100644 index 000000000..a3597876c --- /dev/null +++ b/mephisto/abstractions/architects/ec2/run_scripts/flask/run_server.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +cd /home/ec2-user/routing_server/router/ +PORT=5000 python3 app.py diff --git a/mephisto/abstractions/architects/ec2/run_scripts/node/init_server.sh b/mephisto/abstractions/architects/ec2/run_scripts/node/init_server.sh new file mode 100644 index 000000000..dcc734ed9 --- /dev/null +++ b/mephisto/abstractions/architects/ec2/run_scripts/node/init_server.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +echo "Installing basic requirements..." +sudo yum update -y >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 +sudo yum install -y httpd >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 + +echo "Downloading Node..." +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 +. ~/.nvm/nvm.sh >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 +nvm install node >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 + +echo "Installing router modules..." +cd /home/ec2-user/routing_server/router/ +npm install >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 + +echo "Preparing service..." +sudo cp /home/ec2-user/routing_server/setup/router.service /etc/systemd/system/router.service +sudo chmod 744 /home/ec2-user/routing_server/setup/run_server.sh +sudo chmod 664 /etc/systemd/system/router.service + +echo "Launching service..." +sudo systemctl daemon-reload >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 +sudo systemctl enable router.service >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 +sudo systemctl start router.service >> /home/ec2-user/routing_server/setup/setup_log.txt 2>&1 diff --git a/mephisto/abstractions/architects/ec2/run_scripts/node/router.service b/mephisto/abstractions/architects/ec2/run_scripts/node/router.service new file mode 100644 index 000000000..f5da8541a --- /dev/null +++ b/mephisto/abstractions/architects/ec2/run_scripts/node/router.service @@ -0,0 +1,12 @@ +[Unit] +After=network.service +Description=Routing server application + +[Service] +Type=simple +User=ec2-user +Restart=always +ExecStart=/home/ec2-user/routing_server/setup/run_server.sh + +[Install] +WantedBy=multi-user.target diff --git a/mephisto/abstractions/architects/ec2/run_scripts/node/run_server.sh b/mephisto/abstractions/architects/ec2/run_scripts/node/run_server.sh new file mode 100644 index 000000000..e73684e73 --- /dev/null +++ b/mephisto/abstractions/architects/ec2/run_scripts/node/run_server.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +cd /home/ec2-user/routing_server/router/ +. ~/.nvm/nvm.sh +PORT=5000 node server.js diff --git a/mephisto/abstractions/architects/ec2/servers/README.md b/mephisto/abstractions/architects/ec2/servers/README.md new file mode 100644 index 000000000..2c2bdc4ff --- /dev/null +++ b/mephisto/abstractions/architects/ec2/servers/README.md @@ -0,0 +1,3 @@ +# Server Details + +This folder is used to store all of the server details that have been launched by this architect, such that there is a location to find the details and cleanup if something goes wrong. \ No newline at end of file diff --git a/mephisto/abstractions/blueprints/static_html_task/static_html_blueprint.py b/mephisto/abstractions/blueprints/static_html_task/static_html_blueprint.py index 6b41d982b..4472eea0d 100644 --- a/mephisto/abstractions/blueprints/static_html_task/static_html_blueprint.py +++ b/mephisto/abstractions/blueprints/static_html_task/static_html_blueprint.py @@ -7,6 +7,7 @@ from mephisto.abstractions.blueprints.abstract.static_task.static_blueprint import ( StaticBlueprint, StaticBlueprintArgs, + SharedStaticTaskState, ) from dataclasses import dataclass, field from omegaconf import MISSING, DictConfig diff --git a/mephisto/operations/registry.py b/mephisto/operations/registry.py index d8ea8e515..b82128d7a 100644 --- a/mephisto/operations/registry.py +++ b/mephisto/operations/registry.py @@ -102,6 +102,8 @@ def fill_registries(): importlib.import_module( f"mephisto.abstractions.architects.{architect_name}" ) + # After imports are recursive, manage this more cleanly + importlib.import_module("mephisto.abstractions.architects.ec2.ec2_architect") # Import Mephisto Blueprints blueprint_root = os.path.join( diff --git a/mephisto/tools/scripts.py b/mephisto/tools/scripts.py index 19bb56867..e6e275301 100644 --- a/mephisto/tools/scripts.py +++ b/mephisto/tools/scripts.py @@ -117,7 +117,10 @@ def augment_config_from_db(script_cfg: DictConfig, db: "MephistoDB") -> DictConf input( f"This task is going to launch live on {provider_type}, press enter to continue: " ) - if provider_type in ["mturk_sandbox", "mturk"] and architect_type != "heroku": + if provider_type in ["mturk_sandbox", "mturk"] and architect_type not in [ + "heroku", + "ec2", + ]: input( f"This task is going to launch live on {provider_type}, but your " f"provided architect is {architect_type}, are you sure you "