From 3b11c137dbbfdce7795dbf8c33bde56d4529d70f Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Mon, 9 Dec 2024 12:19:02 -0800 Subject: [PATCH 01/28] Removing start, stop with ec2.py, adding validations --- scripts/aws/config-server/requirements.txt | 4 + scripts/aws/ec2.py | 219 ++++++++++++++++++ scripts/aws/start.sh | 124 ---------- scripts/aws/stop.sh | 31 --- .../uid2-operator-ami/ansible/playbook.yml | 17 +- scripts/confidential_compute.py | 92 ++++++++ 6 files changed, 321 insertions(+), 166 deletions(-) create mode 100644 scripts/aws/ec2.py delete mode 100644 scripts/aws/start.sh delete mode 100644 scripts/aws/stop.sh create mode 100644 scripts/confidential_compute.py diff --git a/scripts/aws/config-server/requirements.txt b/scripts/aws/config-server/requirements.txt index 57652a258..957ba1d3e 100644 --- a/scripts/aws/config-server/requirements.txt +++ b/scripts/aws/config-server/requirements.txt @@ -1,3 +1,7 @@ Flask==2.3.2 Werkzeug==3.0.3 setuptools==70.0.0 +requests==2.32.3 +boto3==1.35.59 +urllib3==2.2.3 +PyYAML===5.4.1 \ No newline at end of file diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py new file mode 100644 index 000000000..56adf5d26 --- /dev/null +++ b/scripts/aws/ec2.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python3 + +import boto3 +import json +import os +import subprocess +import re +import multiprocessing +import requests +import signal +import argparse +from botocore.exceptions import ClientError +from typing import Dict +import sys +import time +import yaml + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from confidential_compute import ConfidentialCompute, ConfidentialComputeConfig, ConfidentialComputeMissingConfigError + +class EC2(ConfidentialCompute): + + def __init__(self): + super().__init__() + + def __get_aws_token(self) -> str: + """Fetches a temporary AWS EC2 metadata token.""" + try: + token_url = "http://169.254.169.254/latest/api/token" + response = requests.put( + token_url, headers={"X-aws-ec2-metadata-token-ttl-seconds": "3600"}, timeout=2 + ) + return response.text + except requests.RequestException as e: + raise RuntimeError(f"Failed to fetch aws token: {e}") + + def __get_current_region(self) -> str: + """Fetches the current AWS region from EC2 instance metadata.""" + token = self.__get_aws_token() + metadata_url = "http://169.254.169.254/latest/dynamic/instance-identity/document" + headers = {"X-aws-ec2-metadata-token": token} + try: + response = requests.get(metadata_url, headers=headers, timeout=2) + response.raise_for_status() + return response.json()["region"] + except requests.RequestException as e: + raise RuntimeError(f"Failed to fetch region: {e}") + + def __validate_configs(self, secret): + required_keys = ["operator_key", "environment", "core_base_url", "optout_base_url"] + missing_keys = [key for key in required_keys if key not in secret] + if missing_keys: + raise ConfidentialComputeMissingConfigError(missing_keys) + + + def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig: + secret_identifier = "uid2-config-stack-tjm-unvalidate-eif-test1" + """Fetches a secret value from AWS Secrets Manager.""" + region = self.__get_current_region() + client = boto3.client("secretsmanager", region_name=region) + try: + secret = json.loads(client.get_secret_value(SecretId=secret_identifier)["SecretString"]) + self.__validate_configs(secret) + return self.__add_defaults(secret) + except ClientError as e: + raise RuntimeError(f"Unable to access Secrets Manager {secret_identifier}: {e}") + + @staticmethod + def __add_defaults(configs: Dict[str, any]) -> ConfidentialComputeConfig: + """Adds default values to configuration if missing.""" + try: + with open("/etc/nitro_enclaves/allocator.yaml", "r") as file: + nitro_config = yaml.safe_load(file) + configs.setdefault("enclave_memory_mb", nitro_config['memory_mib']) + configs.setdefault("enclave_cpu_count", nitro_config['cpu_count']) + except Exception as e: + raise RuntimeError("/etc/nitro_enclaves/allocator.yaml does not exist/ does not have cpu, memory allocated") + configs.setdefault("debug_mode", False) + #urls are currently not set anywhere and is overridden based on identity scope available only inside docker. Change to passing those through config values. + #And these should be validated + configs.setdefault("core_base_url", "https://core.uidapi.com" if configs["environment"] == "prod" else "https://core-integ.uidapi.com") + configs.setdefault("optout_base_url", "https://optout.uidapi.com" if configs["environment"] == "prod" else "https://optout-integ.uidapi.com") + return configs + + def __setup_vsockproxy(self, log_level: int) -> None: + """ + Sets up the vsock proxy service. + TODO: Evaluate adding vsock logging based on log_level here + """ + thread_count = (multiprocessing.cpu_count() + 1) // 2 + command = [ + "/usr/bin/vsockpx", "-c", "/etc/uid2operator/proxy.yaml", + "--workers", str(thread_count), "--log-level", str(log_level), "--daemon" + ] + subprocess.run(command) + + def __run_config_server(self,log_level = None) -> None: + """ + Starts the Flask configuration server. + TODO: Based on log level add logging to flask + """ + os.makedirs("/etc/secret/secret-value", exist_ok=True) + config_path = "/etc/secret/secret-value/config" + with open(config_path, 'w') as config_file: + json.dump(self.configs, config_file) + os.chdir("/opt/uid2operator/config-server") + command = ["./bin/flask", "run", "--host", "127.0.0.1", "--port", "27015"] + try: + subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + except Exception as e: + print(f"Failed to start the Flask config server.\n '{' '.join(command)}': {e}") + raise RuntimeError ("Failed to start required flask server") + + def __run_socks_proxy(self, log_level = None) -> None: + """ + Starts the SOCKS proxy service. + TODO: Based on log level add logging to sockd + """ + command = ["sockd", "-D"] + subprocess.run(command) + + def __get_secret_name_from_userdata(self) -> str: + """Extracts the secret name from EC2 user data.""" + token = self.__get_aws_token() + user_data_url = "http://169.254.169.254/latest/user-data" + response = requests.get(user_data_url, headers={"X-aws-ec2-metadata-token": token}) + user_data = response.text + + with open("/opt/uid2operator/identity_scope.txt") as file: + identity_scope = file.read().strip() + + default_name = f"{identity_scope.lower()}-operator-config-key" + hardcoded_value = f"{identity_scope.upper()}_CONFIG_SECRET_KEY" + match = re.search(rf'^export {hardcoded_value}="(.+?)"$', user_data, re.MULTILINE) + return match.group(1) if match else default_name + + def _setup_auxiliaries(self) -> None: + """Sets up the necessary auxiliary services and configuration.""" + self.configs = self._get_secret(self.__get_secret_name_from_userdata()) + log_level = 3 if self.configs["debug_mode"] else 1 + self.__setup_vsockproxy(log_level) + self.__run_config_server(log_level) + self.__run_socks_proxy(log_level) + time.sleep(5) #TODO: Change to while loop if required. + + def _validate_auxiliaries(self) -> None: + """Validates auxiliary services.""" + self.validate_operator_key() + proxy = "socks5://127.0.0.1:3306" + config_url = "http://127.0.0.1:27015/getConfig" + try: + response = requests.get(config_url) + response.raise_for_status() + except requests.RequestException as e: + raise RuntimeError(f"Config server unreachable: {e}") + proxies = {"http": proxy, "https": proxy} + try: + response = requests.get(config_url, proxies=proxies) + response.raise_for_status() + except requests.RequestException as e: + raise RuntimeError(f"Cannot connect to config server via SOCKS proxy: {e}") + + def run_compute(self) -> None: + """Main execution flow for confidential compute.""" + self._setup_auxiliaries() + self._validate_auxiliaries() + self.validate_connectivity() + command = [ + "nitro-cli", "run-enclave", + "--eif-path", "/opt/uid2operator/uid2operator.eif", + "--memory", str(self.configs["enclave_memory_mb"]), + "--cpu-count", str(self.configs["enclave_cpu_count"]), + "--enclave-cid", "42", + "--enclave-name", "uid2operator" + ] + if self.configs["debug_mode"]: + command += ["--debug-mode", "--attach-console"] + subprocess.run(command, check=True) + + def cleanup(self) -> None: + """Terminates the Nitro Enclave and auxiliary processes.""" + try: + describe_output = subprocess.check_output(["nitro-cli", "describe-enclaves"], text=True) + enclaves = json.loads(describe_output) + enclave_id = enclaves[0].get("EnclaveID") if enclaves else None + if enclave_id: + subprocess.run(["nitro-cli", "terminate-enclave", "--enclave-id", enclave_id]) + print(f"Terminated enclave with ID: {enclave_id}") + else: + print("No active enclaves found.") + self.__kill_auxiliaries() + except subprocess.SubprocessError as e: + raise (f"Error during cleanup: {e}") + + def __kill_auxiliaries(self) -> None: + """Kills a process by its name.""" + try: + for process_name in ["vsockpx", "sockd", "flask"]: + result = subprocess.run(["pgrep", "-f", process_name], stdout=subprocess.PIPE, text=True, check=False) + if result.stdout.strip(): + for pid in result.stdout.strip().split("\n"): + os.kill(int(pid), signal.SIGKILL) + print(f"Killed process '{process_name}'.") + else: + print(f"No process named '{process_name}' found.") + except Exception as e: + print(f"Error killing process '{process_name}': {e}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Manage EC2-based confidential compute workflows.") + parser.add_argument("-o", "--operation", choices=["stop", "start"], default="start", help="Operation to perform.") + args = parser.parse_args() + ec2 = EC2() + if args.operation == "stop": + ec2.cleanup() + else: + ec2.run_compute() + \ No newline at end of file diff --git a/scripts/aws/start.sh b/scripts/aws/start.sh deleted file mode 100644 index 429826928..000000000 --- a/scripts/aws/start.sh +++ /dev/null @@ -1,124 +0,0 @@ -#!/bin/bash - -echo "$HOSTNAME" > /etc/uid2operator/HOSTNAME -EIF_PATH=${EIF_PATH:-/opt/uid2operator/uid2operator.eif} -IDENTITY_SCOPE=${IDENTITY_SCOPE:-$(cat /opt/uid2operator/identity_scope.txt)} -CID=${CID:-42} -TOKEN=$(curl --request PUT "http://169.254.169.254/latest/api/token" --header "X-aws-ec2-metadata-token-ttl-seconds: 3600") -USER_DATA=$(curl -s http://169.254.169.254/latest/user-data --header "X-aws-ec2-metadata-token: $TOKEN") -AWS_REGION_NAME=$(curl -s http://169.254.169.254/latest/dynamic/instance-identity/document/ --header "X-aws-ec2-metadata-token: $TOKEN" | jq -r '.region') -if [ "$IDENTITY_SCOPE" = 'UID2' ]; then - UID2_CONFIG_SECRET_KEY=$([[ "$(echo "${USER_DATA}" | grep UID2_CONFIG_SECRET_KEY=)" =~ ^export\ UID2_CONFIG_SECRET_KEY=\"(.*)\"$ ]] && echo "${BASH_REMATCH[1]}" || echo "uid2-operator-config-key") -elif [ "$IDENTITY_SCOPE" = 'EUID' ]; then - UID2_CONFIG_SECRET_KEY=$([[ "$(echo "${USER_DATA}" | grep EUID_CONFIG_SECRET_KEY=)" =~ ^export\ EUID_CONFIG_SECRET_KEY=\"(.*)\"$ ]] && echo "${BASH_REMATCH[1]}" || echo "euid-operator-config-key") -else - echo "Unrecognized IDENTITY_SCOPE $IDENTITY_SCOPE" - exit 1 -fi -CORE_BASE_URL=$([[ "$(echo "${USER_DATA}" | grep CORE_BASE_URL=)" =~ ^export\ CORE_BASE_URL=\"(.*)\"$ ]] && echo "${BASH_REMATCH[1]}" || echo "") -OPTOUT_BASE_URL=$([[ "$(echo "${USER_DATA}" | grep OPTOUT_BASE_URL=)" =~ ^export\ OPTOUT_BASE_URL=\"(.*)\"$ ]] && echo "${BASH_REMATCH[1]}" || echo "") - -echo "UID2_CONFIG_SECRET_KEY=${UID2_CONFIG_SECRET_KEY}" -echo "CORE_BASE_URL=${CORE_BASE_URL}" -echo "OPTOUT_BASE_URL=${OPTOUT_BASE_URL}" -echo "AWS_REGION_NAME=${AWS_REGION_NAME}" - -function terminate_old_enclave() { - ENCLAVE_ID=$(nitro-cli describe-enclaves | jq -r ".[0].EnclaveID") - [ "$ENCLAVE_ID" != "null" ] && nitro-cli terminate-enclave --enclave-id ${ENCLAVE_ID} -} - -function config_aws() { - aws configure set default.region $AWS_REGION_NAME -} - -function default_cpu() { - target=$(( $(nproc) * 3 / 4 )) - if [ $target -lt 2 ]; then - target="2" - fi - echo $target -} - -function default_mem() { - target=$(( $(grep MemTotal /proc/meminfo | awk '{print $2}') * 3 / 4000 )) - if [ $target -lt 24576 ]; then - target="24576" - fi - echo $target -} - -function read_allocation() { - USER_CUSTOMIZED=$(aws secretsmanager get-secret-value --secret-id "$UID2_CONFIG_SECRET_KEY" | jq -r '.SecretString' | jq -r '.customize_enclave') - shopt -s nocasematch - if [ "$USER_CUSTOMIZED" = "true" ]; then - echo "Applying user customized CPU/Mem allocation..." - CPU_COUNT=${CPU_COUNT:-$(aws secretsmanager get-secret-value --secret-id "$UID2_CONFIG_SECRET_KEY" | jq -r '.SecretString' | jq -r '.enclave_cpu_count')} - MEMORY_MB=${MEMORY_MB:-$(aws secretsmanager get-secret-value --secret-id "$UID2_CONFIG_SECRET_KEY" | jq -r '.SecretString' | jq -r '.enclave_memory_mb')} - else - echo "Applying default CPU/Mem allocation..." - CPU_COUNT=6 - MEMORY_MB=24576 - fi - shopt -u nocasematch -} - - -function update_allocation() { - ALLOCATOR_YAML=/etc/nitro_enclaves/allocator.yaml - if [ -z "$CPU_COUNT" ] || [ -z "$MEMORY_MB" ]; then - echo 'No CPU_COUNT or MEMORY_MB set, cannot start enclave' - exit 1 - fi - echo "updating allocator: CPU_COUNT=$CPU_COUNT, MEMORY_MB=$MEMORY_MB..." - systemctl stop nitro-enclaves-allocator.service - sed -r "s/^(\s*memory_mib\s*:\s*).*/\1$MEMORY_MB/" -i $ALLOCATOR_YAML - sed -r "s/^(\s*cpu_count\s*:\s*).*/\1$CPU_COUNT/" -i $ALLOCATOR_YAML - systemctl start nitro-enclaves-allocator.service && systemctl enable nitro-enclaves-allocator.service - echo "nitro-enclaves-allocator restarted" -} - -function setup_vsockproxy() { - VSOCK_PROXY=${VSOCK_PROXY:-/usr/bin/vsockpx} - VSOCK_CONFIG=${VSOCK_CONFIG:-/etc/uid2operator/proxy.yaml} - VSOCK_THREADS=${VSOCK_THREADS:-$(( ( $(nproc) + 1 ) / 2 )) } - VSOCK_LOG_LEVEL=${VSOCK_LOG_LEVEL:-3} - echo "starting vsock proxy at $VSOCK_PROXY with $VSOCK_THREADS worker threads..." - $VSOCK_PROXY -c $VSOCK_CONFIG --workers $VSOCK_THREADS --log-level $VSOCK_LOG_LEVEL --daemon - echo "vsock proxy now running in background." -} - -function setup_dante() { - sockd -D -} - -function run_config_server() { - mkdir -p /etc/secret/secret-value - { - set +x; # Disable tracing within this block - 2>/dev/null; - SECRET_JSON=$(aws secretsmanager get-secret-value --secret-id "$UID2_CONFIG_SECRET_KEY" | jq -r '.SecretString') - echo "${SECRET_JSON}" > /etc/secret/secret-value/config; - } - echo $(jq ".core_base_url = \"$CORE_BASE_URL\"" /etc/secret/secret-value/config) > /etc/secret/secret-value/config - echo $(jq ".optout_base_url = \"$OPTOUT_BASE_URL\"" /etc/secret/secret-value/config) > /etc/secret/secret-value/config - echo "run_config_server" - cd /opt/uid2operator/config-server - ./bin/flask run --host 127.0.0.1 --port 27015 & -} - -function run_enclave() { - echo "starting enclave..." - nitro-cli run-enclave --eif-path $EIF_PATH --memory $MEMORY_MB --cpu-count $CPU_COUNT --enclave-cid $CID --enclave-name uid2operator -} - -terminate_old_enclave -config_aws -read_allocation -# update_allocation -setup_vsockproxy -setup_dante -run_config_server -run_enclave - -echo "Done!" diff --git a/scripts/aws/stop.sh b/scripts/aws/stop.sh deleted file mode 100644 index c37bdc729..000000000 --- a/scripts/aws/stop.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -function terminate_old_enclave() { - echo "Terminating Enclave..." - ENCLAVE_ID=$(nitro-cli describe-enclaves | jq -r ".[0].EnclaveID") - if [ "$ENCLAVE_ID" != "null" ]; then - nitro-cli terminate-enclave --enclave-id $ENCLAVE_ID - else - echo "no running enclaves to terminate" - fi -} - -function kill_process() { - echo "Shutting down $1..." - pid=$(pidof $1) - if [ -z "$pid" ]; then - echo "process $1 not found" - else - kill -9 $pid - echo "$1 exited" - fi -} - -terminate_old_enclave -kill_process vsockpx -kill_process sockd -# we start aws vsock-proxy via nohup -kill_process vsock-proxy -kill_process nohup - -echo "Done!" diff --git a/scripts/aws/uid2-operator-ami/ansible/playbook.yml b/scripts/aws/uid2-operator-ami/ansible/playbook.yml index 84c6c6f14..8fb27c2d5 100644 --- a/scripts/aws/uid2-operator-ami/ansible/playbook.yml +++ b/scripts/aws/uid2-operator-ami/ansible/playbook.yml @@ -72,26 +72,21 @@ - name: Install starter script ansible.builtin.copy: - src: /tmp/artifacts/start.sh - dest: /opt/uid2operator/start.sh + src: /tmp/artifacts/ec2.py + dest: /opt/uid2operator/ec2.py remote_src: yes - name: Make starter script executable ansible.builtin.file: - path: /opt/uid2operator/start.sh + path: /opt/uid2operator/ec2.py mode: '0755' - - name: Install stopper script + - name: Copy confidential_compute script ansible.builtin.copy: - src: /tmp/artifacts/stop.sh - dest: /opt/uid2operator/stop.sh + src: /tmp/artifacts/confidential_compute.py + dest: /opt/uid2operator/confidential_compute.py remote_src: yes - - name: Make starter script executable - ansible.builtin.file: - path: /opt/uid2operator/stop.sh - mode: '0755' - - name: Install Operator EIF ansible.builtin.copy: src: /tmp/artifacts/uid2operator.eif diff --git a/scripts/confidential_compute.py b/scripts/confidential_compute.py new file mode 100644 index 000000000..56018792c --- /dev/null +++ b/scripts/confidential_compute.py @@ -0,0 +1,92 @@ +import requests +import re +import socket +from urllib.parse import urlparse +from abc import ABC, abstractmethod +from typing import TypedDict + + +class ConfidentialComputeConfig(TypedDict): + enclave_memory_mb: int + enclave_cpu_count: int + debug_mode: bool + operator_key: str + core_base_url: str + optout_base_url: str + environment: str + +class ConfidentialCompute(ABC): + + def __init__(self): + self.configs: ConfidentialComputeConfig = {} + + @abstractmethod + def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig: + """ + Fetches the secret from a secret store. + + Raises: + SecretNotFoundException: If the secret is not found. + """ + pass + + def validate_operator_key(self) -> bool: + """ Validates the operator key format and its environment alignment.""" + operator_key = self.configs.get("operator_key") + if not operator_key: + raise ValueError("API token is missing from the configuration.") + pattern = r"^(UID2|EUID)-.\-(I|P)-\d+-\*$" + if re.match(pattern, operator_key): + env = self.configs.get("environment", "").lower() + debug_mode = self.configs.get("debug_mode", False) + expected_env = "I" if debug_mode or env == "integ" else "P" + if operator_key.split("-")[2] != expected_env: + raise ValueError( + f"Operator key does not match the expected environment ({expected_env})." + ) + return True + + @staticmethod + def __resolve_hostname(url: str) -> str: + """ Resolves the hostname of a URL to an IP address.""" + hostname = urlparse(url).netloc + return socket.gethostbyname(hostname) + + def validate_connectivity(self) -> None: + """ Validates that the core and opt-out URLs are accessible.""" + try: + core_url = self.configs["core_base_url"] + optout_url = self.configs["optout_base_url"] + core_ip = self.__resolve_hostname(core_url) + requests.get(core_url, timeout=5) + optout_ip = self.__resolve_hostname(optout_url) + requests.get(optout_url, timeout=5) + except (requests.ConnectionError, requests.Timeout) as e: + raise Exception( + f"Failed to reach required URLs. Consider enabling {core_ip}, {optout_ip} in the egress firewall." + ) + except Exception as e: + raise Exception("Failed to reach the URLs.") from e + + @abstractmethod + def _setup_auxiliaries(self) -> None: + """ Sets up auxiliary processes required for confidential computing. """ + pass + + @abstractmethod + def _validate_auxiliaries(self) -> None: + """ Validates auxiliary services are running.""" + pass + + @abstractmethod + def run_compute(self) -> None: + """ Runs confidential computing.""" + pass + +class ConfidentialComputeMissingConfigError(Exception): + """Custom exception to handle missing config keys.""" + def __init__(self, missing_keys): + self.missing_keys = missing_keys + self.message = f"Missing configuration keys: {', '.join(missing_keys)}" + super().__init__(self.message) + From 6034c5eea36e089713eccafd2fcc7b856cee18c5 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Mon, 9 Dec 2024 12:30:36 -0800 Subject: [PATCH 02/28] Removing start, stop with ec2.py, adding validations --- scripts/aws/ec2.py | 4 ++-- scripts/confidential_compute.py | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index 56adf5d26..3aad1cc6a 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -16,7 +16,7 @@ import yaml sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from confidential_compute import ConfidentialCompute, ConfidentialComputeConfig, ConfidentialComputeMissingConfigError +from confidential_compute import ConfidentialCompute, ConfidentialComputeConfig, ConfidentialComputeMissingConfigError, SecretNotFoundException class EC2(ConfidentialCompute): @@ -63,7 +63,7 @@ def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig: self.__validate_configs(secret) return self.__add_defaults(secret) except ClientError as e: - raise RuntimeError(f"Unable to access Secrets Manager {secret_identifier}: {e}") + raise SecretNotFoundException(f"{secret_identifier} in {region}") @staticmethod def __add_defaults(configs: Dict[str, any]) -> ConfidentialComputeConfig: diff --git a/scripts/confidential_compute.py b/scripts/confidential_compute.py index 56018792c..b9ec723ea 100644 --- a/scripts/confidential_compute.py +++ b/scripts/confidential_compute.py @@ -90,3 +90,8 @@ def __init__(self, missing_keys): self.message = f"Missing configuration keys: {', '.join(missing_keys)}" super().__init__(self.message) +class SecretNotFoundException(Exception): + """Custom exception if secret manager is not found""" + def __init__(self, name): + self.message = f"Secret manager not found - {name}" + super().__init__(self.message) From d1f17560812fcb4b1f7e07ee193ec4368d96c9da Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Mon, 9 Dec 2024 17:18:20 -0800 Subject: [PATCH 03/28] Updates --- scripts/aws/config-server/requirements.txt | 2 +- scripts/aws/ec2.py | 58 +++++++++++++--------- scripts/confidential_compute.py | 19 ++++++- 3 files changed, 53 insertions(+), 26 deletions(-) diff --git a/scripts/aws/config-server/requirements.txt b/scripts/aws/config-server/requirements.txt index 957ba1d3e..c140fc7f6 100644 --- a/scripts/aws/config-server/requirements.txt +++ b/scripts/aws/config-server/requirements.txt @@ -1,7 +1,7 @@ Flask==2.3.2 Werkzeug==3.0.3 setuptools==70.0.0 -requests==2.32.3 +requests[socks]==2.32.3 boto3==1.35.59 urllib3==2.2.3 PyYAML===5.4.1 \ No newline at end of file diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index 3aad1cc6a..1f3d7ec7e 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -23,6 +23,18 @@ class EC2(ConfidentialCompute): def __init__(self): super().__init__() + @staticmethod + def run_command(command, seperate_process=False): + print(f"Running command: {' '.join(command)}") + try: + if seperate_process: + subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + else: + subprocess.run(command,check=True) + except Exception as e: + print(f"Failed to run command: {str(e)}") + raise RuntimeError (f"Failed to start {' '.join(command)} ") + def __get_aws_token(self) -> str: """Fetches a temporary AWS EC2 metadata token.""" try: @@ -51,7 +63,11 @@ def __validate_configs(self, secret): missing_keys = [key for key in required_keys if key not in secret] if missing_keys: raise ConfidentialComputeMissingConfigError(missing_keys) - + if "enclave_memory_mb" in secret or "enclave_cpu_count" in secret: + max_capacity = self.__get_max_capacity() + for key in ["enclave_memory_mb", "enclave_cpu_count"]: + if int(secret.get(key, 0)) >= max_capacity.get(key): + raise ValueError(f"{key} value ({secret.get(key, 0)}) exceeds the maximum allowed ({max_capacity.get(key)}).") def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig: secret_identifier = "uid2-config-stack-tjm-unvalidate-eif-test1" @@ -64,22 +80,22 @@ def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig: return self.__add_defaults(secret) except ClientError as e: raise SecretNotFoundException(f"{secret_identifier} in {region}") - + @staticmethod - def __add_defaults(configs: Dict[str, any]) -> ConfidentialComputeConfig: - """Adds default values to configuration if missing.""" + def __get_max_capacity(): try: with open("/etc/nitro_enclaves/allocator.yaml", "r") as file: nitro_config = yaml.safe_load(file) - configs.setdefault("enclave_memory_mb", nitro_config['memory_mib']) - configs.setdefault("enclave_cpu_count", nitro_config['cpu_count']) + return {"enclave_memory_mb": nitro_config['memory_mib'], "enclave_cpu_count": nitro_config['cpu_count']} except Exception as e: - raise RuntimeError("/etc/nitro_enclaves/allocator.yaml does not exist/ does not have cpu, memory allocated") + raise RuntimeError("/etc/nitro_enclaves/allocator.yaml does not have CPU, memory allocated") + + def __add_defaults(self, configs: Dict[str, any]) -> ConfidentialComputeConfig: + """Adds default values to configuration if missing.""" + default_capacity = self.__get_max_capacity() + configs.setdefault("enclave_memory_mb", default_capacity["enclave_memory_mb"]) + configs.setdefault("enclave_cpu_count", default_capacity["enclave_cpu_count"]) configs.setdefault("debug_mode", False) - #urls are currently not set anywhere and is overridden based on identity scope available only inside docker. Change to passing those through config values. - #And these should be validated - configs.setdefault("core_base_url", "https://core.uidapi.com" if configs["environment"] == "prod" else "https://core-integ.uidapi.com") - configs.setdefault("optout_base_url", "https://optout.uidapi.com" if configs["environment"] == "prod" else "https://optout-integ.uidapi.com") return configs def __setup_vsockproxy(self, log_level: int) -> None: @@ -92,12 +108,11 @@ def __setup_vsockproxy(self, log_level: int) -> None: "/usr/bin/vsockpx", "-c", "/etc/uid2operator/proxy.yaml", "--workers", str(thread_count), "--log-level", str(log_level), "--daemon" ] - subprocess.run(command) + self.run_command(command) - def __run_config_server(self,log_level = None) -> None: + def __run_config_server(self) -> None: """ Starts the Flask configuration server. - TODO: Based on log level add logging to flask """ os.makedirs("/etc/secret/secret-value", exist_ok=True) config_path = "/etc/secret/secret-value/config" @@ -105,19 +120,14 @@ def __run_config_server(self,log_level = None) -> None: json.dump(self.configs, config_file) os.chdir("/opt/uid2operator/config-server") command = ["./bin/flask", "run", "--host", "127.0.0.1", "--port", "27015"] - try: - subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - except Exception as e: - print(f"Failed to start the Flask config server.\n '{' '.join(command)}': {e}") - raise RuntimeError ("Failed to start required flask server") + self.run_command(command, seperate_process=True) - def __run_socks_proxy(self, log_level = None) -> None: + def __run_socks_proxy(self) -> None: """ Starts the SOCKS proxy service. - TODO: Based on log level add logging to sockd """ command = ["sockd", "-D"] - subprocess.run(command) + self.run_command(command) def __get_secret_name_from_userdata(self) -> str: """Extracts the secret name from EC2 user data.""" @@ -175,7 +185,7 @@ def run_compute(self) -> None: ] if self.configs["debug_mode"]: command += ["--debug-mode", "--attach-console"] - subprocess.run(command, check=True) + self.run_command(command) def cleanup(self) -> None: """Terminates the Nitro Enclave and auxiliary processes.""" @@ -184,7 +194,7 @@ def cleanup(self) -> None: enclaves = json.loads(describe_output) enclave_id = enclaves[0].get("EnclaveID") if enclaves else None if enclave_id: - subprocess.run(["nitro-cli", "terminate-enclave", "--enclave-id", enclave_id]) + self.run_command(["nitro-cli", "terminate-enclave", "--enclave-id", enclave_id]) print(f"Terminated enclave with ID: {enclave_id}") else: print("No active enclaves found.") diff --git a/scripts/confidential_compute.py b/scripts/confidential_compute.py index b9ec723ea..cb7b7334e 100644 --- a/scripts/confidential_compute.py +++ b/scripts/confidential_compute.py @@ -30,7 +30,24 @@ def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig: """ pass - def validate_operator_key(self) -> bool: + def validate_environment(self): + def validate_url(url_key, environment): + if environment not in self.configs[url_key]: + raise ValueError( + f"{url_key} must match the environment. Ensure the URL includes '{environment}'." + ) + + environment = self.configs["environment"] + + if self.configs.get("debug_mode") and environment == "prod": + raise ValueError("Debug mode cannot be enabled in the production environment.") + + if environment != "prod": + validate_url("core_base_url", environment) + validate_url("optout_base_url", environment) + + + def validate_operator_key(self): """ Validates the operator key format and its environment alignment.""" operator_key = self.configs.get("operator_key") if not operator_key: From f42f872efe36764d34eaabd8f3e84efce1bf1379 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Mon, 9 Dec 2024 17:19:40 -0800 Subject: [PATCH 04/28] Updates --- scripts/aws/ec2.py | 12 ------------ scripts/confidential_compute.py | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index 1f3d7ec7e..db7d5017b 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -23,18 +23,6 @@ class EC2(ConfidentialCompute): def __init__(self): super().__init__() - @staticmethod - def run_command(command, seperate_process=False): - print(f"Running command: {' '.join(command)}") - try: - if seperate_process: - subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - else: - subprocess.run(command,check=True) - except Exception as e: - print(f"Failed to run command: {str(e)}") - raise RuntimeError (f"Failed to start {' '.join(command)} ") - def __get_aws_token(self) -> str: """Fetches a temporary AWS EC2 metadata token.""" try: diff --git a/scripts/confidential_compute.py b/scripts/confidential_compute.py index cb7b7334e..a154e37cb 100644 --- a/scripts/confidential_compute.py +++ b/scripts/confidential_compute.py @@ -4,7 +4,7 @@ from urllib.parse import urlparse from abc import ABC, abstractmethod from typing import TypedDict - +import subprocess class ConfidentialComputeConfig(TypedDict): enclave_memory_mb: int @@ -100,6 +100,19 @@ def run_compute(self) -> None: """ Runs confidential computing.""" pass + + @staticmethod + def run_command(command, seperate_process=False): + print(f"Running command: {' '.join(command)}") + try: + if seperate_process: + subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + else: + subprocess.run(command,check=True) + except Exception as e: + print(f"Failed to run command: {str(e)}") + raise RuntimeError (f"Failed to start {' '.join(command)} ") + class ConfidentialComputeMissingConfigError(Exception): """Custom exception to handle missing config keys.""" def __init__(self, missing_keys): From 2542232ca4c87842195c05d6b46d6cbe1a563bf6 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Tue, 10 Dec 2024 11:42:14 -0800 Subject: [PATCH 05/28] Add virtual env and start it in systemd --- scripts/aws/ec2.py | 9 +++-- scripts/aws/requirements.txt | 4 +++ .../uid2-operator-ami/ansible/playbook.yml | 16 +++++++++ scripts/aws/uid2operator.service | 6 ++-- scripts/confidential_compute.py | 36 +++++++++---------- 5 files changed, 45 insertions(+), 26 deletions(-) create mode 100644 scripts/aws/requirements.txt diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index db7d5017b..e88f424a2 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -47,14 +47,14 @@ def __get_current_region(self) -> str: raise RuntimeError(f"Failed to fetch region: {e}") def __validate_configs(self, secret): - required_keys = ["operator_key", "environment", "core_base_url", "optout_base_url"] + required_keys = ["api_token", "environment", "core_base_url", "optout_base_url"] missing_keys = [key for key in required_keys if key not in secret] if missing_keys: raise ConfidentialComputeMissingConfigError(missing_keys) if "enclave_memory_mb" in secret or "enclave_cpu_count" in secret: max_capacity = self.__get_max_capacity() for key in ["enclave_memory_mb", "enclave_cpu_count"]: - if int(secret.get(key, 0)) >= max_capacity.get(key): + if int(secret.get(key, 0)) > max_capacity.get(key): raise ValueError(f"{key} value ({secret.get(key, 0)}) exceeds the maximum allowed ({max_capacity.get(key)}).") def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig: @@ -89,7 +89,6 @@ def __add_defaults(self, configs: Dict[str, any]) -> ConfidentialComputeConfig: def __setup_vsockproxy(self, log_level: int) -> None: """ Sets up the vsock proxy service. - TODO: Evaluate adding vsock logging based on log_level here """ thread_count = (multiprocessing.cpu_count() + 1) // 2 command = [ @@ -137,8 +136,8 @@ def _setup_auxiliaries(self) -> None: self.configs = self._get_secret(self.__get_secret_name_from_userdata()) log_level = 3 if self.configs["debug_mode"] else 1 self.__setup_vsockproxy(log_level) - self.__run_config_server(log_level) - self.__run_socks_proxy(log_level) + self.__run_config_server() + self.__run_socks_proxy() time.sleep(5) #TODO: Change to while loop if required. def _validate_auxiliaries(self) -> None: diff --git a/scripts/aws/requirements.txt b/scripts/aws/requirements.txt new file mode 100644 index 000000000..fa562cb02 --- /dev/null +++ b/scripts/aws/requirements.txt @@ -0,0 +1,4 @@ +requests[socks]==2.32.3 +boto3==1.35.59 +urllib3==2.2.3 +PyYAML===5.4.1 \ No newline at end of file diff --git a/scripts/aws/uid2-operator-ami/ansible/playbook.yml b/scripts/aws/uid2-operator-ami/ansible/playbook.yml index 8fb27c2d5..c62c18eee 100644 --- a/scripts/aws/uid2-operator-ami/ansible/playbook.yml +++ b/scripts/aws/uid2-operator-ami/ansible/playbook.yml @@ -70,6 +70,12 @@ requirements: /opt/uid2operator/config-server/requirements.txt virtualenv_command: 'python3 -m venv' + - name: Install requirements.txt for enclave init + ansible.builtin.copy: + src: /tmp/artifacts/requirements.txt + dest: /opt/uid2operator/requirements.txt + remote_src: yes + - name: Install starter script ansible.builtin.copy: src: /tmp/artifacts/ec2.py @@ -87,6 +93,16 @@ dest: /opt/uid2operator/confidential_compute.py remote_src: yes + - name: Install python3-pip + dnf: + name: python3-pip + state: present + + - name: Install dependencies from requirements.txt + pip: + requirements: /opt/uid2operator/requirements.txt + state: present + - name: Install Operator EIF ansible.builtin.copy: src: /tmp/artifacts/uid2operator.eif diff --git a/scripts/aws/uid2operator.service b/scripts/aws/uid2operator.service index 1d36b7a91..18281e2f7 100644 --- a/scripts/aws/uid2operator.service +++ b/scripts/aws/uid2operator.service @@ -8,8 +8,8 @@ RemainAfterExit=true StandardOutput=journal StandardError=journal SyslogIdentifier=uid2operator -ExecStart=/opt/uid2operator/start.sh -ExecStop=/opt/uid2operator/stop.sh +ExecStart=python3 /opt/uid2operator/ec2.py +ExecStop=python3 /opt/uid2operator/ec2.py -o stop [Install] -WantedBy=multi-user.target \ No newline at end of file +WantedBy=multi-user.target diff --git a/scripts/confidential_compute.py b/scripts/confidential_compute.py index a154e37cb..8900ade00 100644 --- a/scripts/confidential_compute.py +++ b/scripts/confidential_compute.py @@ -10,7 +10,7 @@ class ConfidentialComputeConfig(TypedDict): enclave_memory_mb: int enclave_cpu_count: int debug_mode: bool - operator_key: str + api_token: str core_base_url: str optout_base_url: str environment: str @@ -20,16 +20,6 @@ class ConfidentialCompute(ABC): def __init__(self): self.configs: ConfidentialComputeConfig = {} - @abstractmethod - def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig: - """ - Fetches the secret from a secret store. - - Raises: - SecretNotFoundException: If the secret is not found. - """ - pass - def validate_environment(self): def validate_url(url_key, environment): if environment not in self.configs[url_key]: @@ -49,7 +39,7 @@ def validate_url(url_key, environment): def validate_operator_key(self): """ Validates the operator key format and its environment alignment.""" - operator_key = self.configs.get("operator_key") + operator_key = self.configs.get("api_token") if not operator_key: raise ValueError("API token is missing from the configuration.") pattern = r"^(UID2|EUID)-.\-(I|P)-\d+-\*$" @@ -62,12 +52,6 @@ def validate_operator_key(self): f"Operator key does not match the expected environment ({expected_env})." ) return True - - @staticmethod - def __resolve_hostname(url: str) -> str: - """ Resolves the hostname of a URL to an IP address.""" - hostname = urlparse(url).netloc - return socket.gethostbyname(hostname) def validate_connectivity(self) -> None: """ Validates that the core and opt-out URLs are accessible.""" @@ -84,6 +68,17 @@ def validate_connectivity(self) -> None: ) except Exception as e: raise Exception("Failed to reach the URLs.") from e + + + @abstractmethod + def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig: + """ + Fetches the secret from a secret store. + + Raises: + SecretNotFoundException: If the secret is not found. + """ + pass @abstractmethod def _setup_auxiliaries(self) -> None: @@ -100,6 +95,11 @@ def run_compute(self) -> None: """ Runs confidential computing.""" pass + @staticmethod + def __resolve_hostname(url: str) -> str: + """ Resolves the hostname of a URL to an IP address.""" + hostname = urlparse(url).netloc + return socket.gethostbyname(hostname) @staticmethod def run_command(command, seperate_process=False): From edf85f380b964fb453b47a304c3ae63826292725 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Tue, 10 Dec 2024 11:44:23 -0800 Subject: [PATCH 06/28] Add virtual env and start it in systemd --- scripts/aws/config-server/requirements.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/scripts/aws/config-server/requirements.txt b/scripts/aws/config-server/requirements.txt index c140fc7f6..8cdd5ef92 100644 --- a/scripts/aws/config-server/requirements.txt +++ b/scripts/aws/config-server/requirements.txt @@ -1,7 +1,3 @@ Flask==2.3.2 Werkzeug==3.0.3 -setuptools==70.0.0 -requests[socks]==2.32.3 -boto3==1.35.59 -urllib3==2.2.3 -PyYAML===5.4.1 \ No newline at end of file +setuptools==70.0.0 \ No newline at end of file From 3e95e4c0ba841da12f2d36f60c53c2ed63e89e4d Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Tue, 10 Dec 2024 11:47:50 -0800 Subject: [PATCH 07/28] Add virtual env and start it in systemd --- scripts/aws/ec2.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index e88f424a2..1ecb457cc 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -58,10 +58,12 @@ def __validate_configs(self, secret): raise ValueError(f"{key} value ({secret.get(key, 0)}) exceeds the maximum allowed ({max_capacity.get(key)}).") def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig: - secret_identifier = "uid2-config-stack-tjm-unvalidate-eif-test1" """Fetches a secret value from AWS Secrets Manager.""" region = self.__get_current_region() - client = boto3.client("secretsmanager", region_name=region) + try: + client = boto3.client("secretsmanager", region_name=region) + except Exception as e: + raise RuntimeError("Please specify AWS secrets as env values, or use IAM instance profile for your instance") try: secret = json.loads(client.get_secret_value(SecretId=secret_identifier)["SecretString"]) self.__validate_configs(secret) From cb70032f63fa42e95ade707c81e78fab8642b0f6 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Tue, 10 Dec 2024 12:39:13 -0800 Subject: [PATCH 08/28] use venv like flask service --- scripts/aws/uid2-operator-ami/ansible/playbook.yml | 12 ++++-------- scripts/aws/uid2operator.service | 4 ++-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/scripts/aws/uid2-operator-ami/ansible/playbook.yml b/scripts/aws/uid2-operator-ami/ansible/playbook.yml index c62c18eee..e6874be94 100644 --- a/scripts/aws/uid2-operator-ami/ansible/playbook.yml +++ b/scripts/aws/uid2-operator-ami/ansible/playbook.yml @@ -93,15 +93,11 @@ dest: /opt/uid2operator/confidential_compute.py remote_src: yes - - name: Install python3-pip - dnf: - name: python3-pip - state: present - - - name: Install dependencies from requirements.txt - pip: + - name: Create virtualenv for eif init + ansible.builtin.pip: + virtualenv: /opt/uid2operator/init requirements: /opt/uid2operator/requirements.txt - state: present + virtualenv_command: 'python3 -m venv' - name: Install Operator EIF ansible.builtin.copy: diff --git a/scripts/aws/uid2operator.service b/scripts/aws/uid2operator.service index 18281e2f7..56559e3c2 100644 --- a/scripts/aws/uid2operator.service +++ b/scripts/aws/uid2operator.service @@ -8,8 +8,8 @@ RemainAfterExit=true StandardOutput=journal StandardError=journal SyslogIdentifier=uid2operator -ExecStart=python3 /opt/uid2operator/ec2.py -ExecStop=python3 /opt/uid2operator/ec2.py -o stop +ExecStart=/opt/uid2operator/init/bin/python /opt/uid2operator/ec2.py +ExecStop=/opt/uid2operator/init/bin/python /opt/uid2operator/ec2.py -o stop [Install] WantedBy=multi-user.target From 5fe844c44f9f1c13ad1ee93f338b81c7c512bd53 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Tue, 10 Dec 2024 12:51:26 -0800 Subject: [PATCH 09/28] use versions --- scripts/aws/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/aws/requirements.txt b/scripts/aws/requirements.txt index fa562cb02..421faba98 100644 --- a/scripts/aws/requirements.txt +++ b/scripts/aws/requirements.txt @@ -1,4 +1,4 @@ requests[socks]==2.32.3 boto3==1.35.59 -urllib3==2.2.3 -PyYAML===5.4.1 \ No newline at end of file +urllib3==1.26.20 +PyYAML===6.0.2 \ No newline at end of file From 937e7a296a7035dd11317a10a27d4deefab261ba Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Tue, 10 Dec 2024 12:58:54 -0800 Subject: [PATCH 10/28] Add URL validation --- scripts/confidential_compute.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/scripts/confidential_compute.py b/scripts/confidential_compute.py index 8900ade00..186781eab 100644 --- a/scripts/confidential_compute.py +++ b/scripts/confidential_compute.py @@ -22,19 +22,24 @@ def __init__(self): def validate_environment(self): def validate_url(url_key, environment): - if environment not in self.configs[url_key]: + """URL should include environment except in prod""" + if environment != "prod" and environment not in self.configs[url_key]: raise ValueError( f"{url_key} must match the environment. Ensure the URL includes '{environment}'." ) - + parsed_url = urlparse(self.configs[url_key]) + if parsed_url.scheme != 'https' and parsed_url.path: + raise ValueError( + f"{url_key} is invalid. Ensure {self.configs[url_key]} follows HTTPS, and doesn't have any path specified." + ) + environment = self.configs["environment"] if self.configs.get("debug_mode") and environment == "prod": raise ValueError("Debug mode cannot be enabled in the production environment.") - if environment != "prod": - validate_url("core_base_url", environment) - validate_url("optout_base_url", environment) + validate_url("core_base_url", environment) + validate_url("optout_base_url", environment) def validate_operator_key(self): From 2b23ff0740aaf6b6cf09e219a68d4f6c4f2ddc11 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Tue, 10 Dec 2024 13:14:11 -0800 Subject: [PATCH 11/28] Move validations around --- scripts/aws/ec2.py | 15 +++---- scripts/confidential_compute.py | 78 ++++++++++++++++++--------------- 2 files changed, 48 insertions(+), 45 deletions(-) diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index 1ecb457cc..4c32e296d 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -46,11 +46,7 @@ def __get_current_region(self) -> str: except requests.RequestException as e: raise RuntimeError(f"Failed to fetch region: {e}") - def __validate_configs(self, secret): - required_keys = ["api_token", "environment", "core_base_url", "optout_base_url"] - missing_keys = [key for key in required_keys if key not in secret] - if missing_keys: - raise ConfidentialComputeMissingConfigError(missing_keys) + def __validate_ec2_specific_config(self, secret): if "enclave_memory_mb" in secret or "enclave_cpu_count" in secret: max_capacity = self.__get_max_capacity() for key in ["enclave_memory_mb", "enclave_cpu_count"]: @@ -63,12 +59,12 @@ def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig: try: client = boto3.client("secretsmanager", region_name=region) except Exception as e: - raise RuntimeError("Please specify AWS secrets as env values, or use IAM instance profile for your instance") + raise RuntimeError("Please use IAM instance profile for your instance that has permission to access Secret Manager") try: secret = json.loads(client.get_secret_value(SecretId=secret_identifier)["SecretString"]) - self.__validate_configs(secret) + self.__validate_ec2_specific_config(secret) return self.__add_defaults(secret) - except ClientError as e: + except ClientError as _: raise SecretNotFoundException(f"{secret_identifier} in {region}") @staticmethod @@ -144,7 +140,7 @@ def _setup_auxiliaries(self) -> None: def _validate_auxiliaries(self) -> None: """Validates auxiliary services.""" - self.validate_operator_key() + self.validate_configuration() proxy = "socks5://127.0.0.1:3306" config_url = "http://127.0.0.1:27015/getConfig" try: @@ -163,7 +159,6 @@ def run_compute(self) -> None: """Main execution flow for confidential compute.""" self._setup_auxiliaries() self._validate_auxiliaries() - self.validate_connectivity() command = [ "nitro-cli", "run-enclave", "--eif-path", "/opt/uid2operator/uid2operator.eif", diff --git a/scripts/confidential_compute.py b/scripts/confidential_compute.py index 186781eab..faf170b9c 100644 --- a/scripts/confidential_compute.py +++ b/scripts/confidential_compute.py @@ -20,7 +20,25 @@ class ConfidentialCompute(ABC): def __init__(self): self.configs: ConfidentialComputeConfig = {} - def validate_environment(self): + def validate_configuration(self): + """ Validates the paramters specified through configs/secret manager .""" + + def validate_operator_key(): + """ Validates the operator key format and its environment alignment.""" + operator_key = self.configs.get("api_token") + if not operator_key: + raise ValueError("API token is missing from the configuration.") + pattern = r"^(UID2|EUID)-.\-(I|P)-\d+-\*$" + if re.match(pattern, operator_key): + env = self.configs.get("environment", "").lower() + debug_mode = self.configs.get("debug_mode", False) + expected_env = "I" if debug_mode or env == "integ" else "P" + if operator_key.split("-")[2] != expected_env: + raise ValueError( + f"Operator key does not match the expected environment ({expected_env})." + ) + return True + def validate_url(url_key, environment): """URL should include environment except in prod""" if environment != "prod" and environment not in self.configs[url_key]: @@ -33,6 +51,27 @@ def validate_url(url_key, environment): f"{url_key} is invalid. Ensure {self.configs[url_key]} follows HTTPS, and doesn't have any path specified." ) + def validate_connectivity(self) -> None: + """ Validates that the core and opt-out URLs are accessible.""" + try: + core_url = self.configs["core_base_url"] + optout_url = self.configs["optout_base_url"] + core_ip = self.__resolve_hostname(core_url) + requests.get(core_url, timeout=5) + optout_ip = self.__resolve_hostname(optout_url) + requests.get(optout_url, timeout=5) + except (requests.ConnectionError, requests.Timeout) as e: + raise Exception( + f"Failed to reach required URLs. Consider enabling {core_ip}, {optout_ip} in the egress firewall." + ) + except Exception as e: + raise Exception("Failed to reach the URLs.") from e + + required_keys = ["api_token", "environment", "core_base_url", "optout_base_url"] + missing_keys = [key for key in required_keys if key not in self.configs] + if missing_keys: + raise ConfidentialComputeMissingConfigError(missing_keys) + environment = self.configs["environment"] if self.configs.get("debug_mode") and environment == "prod": @@ -40,41 +79,10 @@ def validate_url(url_key, environment): validate_url("core_base_url", environment) validate_url("optout_base_url", environment) - - - def validate_operator_key(self): - """ Validates the operator key format and its environment alignment.""" - operator_key = self.configs.get("api_token") - if not operator_key: - raise ValueError("API token is missing from the configuration.") - pattern = r"^(UID2|EUID)-.\-(I|P)-\d+-\*$" - if re.match(pattern, operator_key): - env = self.configs.get("environment", "").lower() - debug_mode = self.configs.get("debug_mode", False) - expected_env = "I" if debug_mode or env == "integ" else "P" - if operator_key.split("-")[2] != expected_env: - raise ValueError( - f"Operator key does not match the expected environment ({expected_env})." - ) - return True - - def validate_connectivity(self) -> None: - """ Validates that the core and opt-out URLs are accessible.""" - try: - core_url = self.configs["core_base_url"] - optout_url = self.configs["optout_base_url"] - core_ip = self.__resolve_hostname(core_url) - requests.get(core_url, timeout=5) - optout_ip = self.__resolve_hostname(optout_url) - requests.get(optout_url, timeout=5) - except (requests.ConnectionError, requests.Timeout) as e: - raise Exception( - f"Failed to reach required URLs. Consider enabling {core_ip}, {optout_ip} in the egress firewall." - ) - except Exception as e: - raise Exception("Failed to reach the URLs.") from e + validate_operator_key() + validate_connectivity() - + @abstractmethod def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig: """ From 44aa71faf8a5aefdc45ec9744194c30d88bb28d1 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Tue, 10 Dec 2024 13:16:35 -0800 Subject: [PATCH 12/28] Move validations around --- scripts/aws/ec2.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index 4c32e296d..fb404bd97 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -55,6 +55,15 @@ def __validate_ec2_specific_config(self, secret): def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig: """Fetches a secret value from AWS Secrets Manager.""" + + def add_defaults(configs: Dict[str, any]) -> ConfidentialComputeConfig: + """Adds default values to configuration if missing.""" + default_capacity = self.__get_max_capacity() + configs.setdefault("enclave_memory_mb", default_capacity["enclave_memory_mb"]) + configs.setdefault("enclave_cpu_count", default_capacity["enclave_cpu_count"]) + configs.setdefault("debug_mode", False) + return configs + region = self.__get_current_region() try: client = boto3.client("secretsmanager", region_name=region) @@ -63,7 +72,7 @@ def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig: try: secret = json.loads(client.get_secret_value(SecretId=secret_identifier)["SecretString"]) self.__validate_ec2_specific_config(secret) - return self.__add_defaults(secret) + return add_defaults(secret) except ClientError as _: raise SecretNotFoundException(f"{secret_identifier} in {region}") @@ -76,14 +85,6 @@ def __get_max_capacity(): except Exception as e: raise RuntimeError("/etc/nitro_enclaves/allocator.yaml does not have CPU, memory allocated") - def __add_defaults(self, configs: Dict[str, any]) -> ConfidentialComputeConfig: - """Adds default values to configuration if missing.""" - default_capacity = self.__get_max_capacity() - configs.setdefault("enclave_memory_mb", default_capacity["enclave_memory_mb"]) - configs.setdefault("enclave_cpu_count", default_capacity["enclave_cpu_count"]) - configs.setdefault("debug_mode", False) - return configs - def __setup_vsockproxy(self, log_level: int) -> None: """ Sets up the vsock proxy service. @@ -132,6 +133,7 @@ def __get_secret_name_from_userdata(self) -> str: def _setup_auxiliaries(self) -> None: """Sets up the necessary auxiliary services and configuration.""" self.configs = self._get_secret(self.__get_secret_name_from_userdata()) + self.validate_configuration() log_level = 3 if self.configs["debug_mode"] else 1 self.__setup_vsockproxy(log_level) self.__run_config_server() @@ -140,7 +142,6 @@ def _setup_auxiliaries(self) -> None: def _validate_auxiliaries(self) -> None: """Validates auxiliary services.""" - self.validate_configuration() proxy = "socks5://127.0.0.1:3306" config_url = "http://127.0.0.1:27015/getConfig" try: From 711d50b928042c16f3c0328d96a3f63f75e87bc2 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Tue, 10 Dec 2024 13:26:59 -0800 Subject: [PATCH 13/28] Move validations around --- scripts/confidential_compute.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/scripts/confidential_compute.py b/scripts/confidential_compute.py index faf170b9c..e0bf4a172 100644 --- a/scripts/confidential_compute.py +++ b/scripts/confidential_compute.py @@ -51,14 +51,14 @@ def validate_url(url_key, environment): f"{url_key} is invalid. Ensure {self.configs[url_key]} follows HTTPS, and doesn't have any path specified." ) - def validate_connectivity(self) -> None: + def validate_connectivity() -> None: """ Validates that the core and opt-out URLs are accessible.""" try: core_url = self.configs["core_base_url"] optout_url = self.configs["optout_base_url"] - core_ip = self.__resolve_hostname(core_url) + core_ip = socket.gethostbyname(urlparse(core_url).netloc) requests.get(core_url, timeout=5) - optout_ip = self.__resolve_hostname(optout_url) + optout_ip = socket.gethostbyname(urlparse(optout_url).netloc) requests.get(optout_url, timeout=5) except (requests.ConnectionError, requests.Timeout) as e: raise Exception( @@ -108,12 +108,6 @@ def run_compute(self) -> None: """ Runs confidential computing.""" pass - @staticmethod - def __resolve_hostname(url: str) -> str: - """ Resolves the hostname of a URL to an IP address.""" - hostname = urlparse(url).netloc - return socket.gethostbyname(hostname) - @staticmethod def run_command(command, seperate_process=False): print(f"Running command: {' '.join(command)}") From 4c694e7be0b9398e21fbb921e9c1cebedb138e32 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Tue, 10 Dec 2024 13:39:35 -0800 Subject: [PATCH 14/28] Remove aws implemnttion from typedict --- scripts/aws/ec2.py | 10 +++++----- scripts/confidential_compute.py | 6 ++++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index fb404bd97..9bb8d42c9 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -16,7 +16,7 @@ import yaml sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from confidential_compute import ConfidentialCompute, ConfidentialComputeConfig, ConfidentialComputeMissingConfigError, SecretNotFoundException +from confidential_compute import ConfidentialCompute, AWSConfidentialComputeConfig, SecretNotFoundException class EC2(ConfidentialCompute): @@ -46,17 +46,17 @@ def __get_current_region(self) -> str: except requests.RequestException as e: raise RuntimeError(f"Failed to fetch region: {e}") - def __validate_ec2_specific_config(self, secret): + def __validate_aws_specific_config(self, secret): if "enclave_memory_mb" in secret or "enclave_cpu_count" in secret: max_capacity = self.__get_max_capacity() for key in ["enclave_memory_mb", "enclave_cpu_count"]: if int(secret.get(key, 0)) > max_capacity.get(key): raise ValueError(f"{key} value ({secret.get(key, 0)}) exceeds the maximum allowed ({max_capacity.get(key)}).") - def _get_secret(self, secret_identifier: str) -> ConfidentialComputeConfig: + def _get_secret(self, secret_identifier: str) -> AWSConfidentialComputeConfig: """Fetches a secret value from AWS Secrets Manager.""" - def add_defaults(configs: Dict[str, any]) -> ConfidentialComputeConfig: + def add_defaults(configs: Dict[str, any]) -> AWSConfidentialComputeConfig: """Adds default values to configuration if missing.""" default_capacity = self.__get_max_capacity() configs.setdefault("enclave_memory_mb", default_capacity["enclave_memory_mb"]) @@ -71,7 +71,7 @@ def add_defaults(configs: Dict[str, any]) -> ConfidentialComputeConfig: raise RuntimeError("Please use IAM instance profile for your instance that has permission to access Secret Manager") try: secret = json.loads(client.get_secret_value(SecretId=secret_identifier)["SecretString"]) - self.__validate_ec2_specific_config(secret) + self.__validate_aws_specific_config(secret) return add_defaults(secret) except ClientError as _: raise SecretNotFoundException(f"{secret_identifier} in {region}") diff --git a/scripts/confidential_compute.py b/scripts/confidential_compute.py index e0bf4a172..325844513 100644 --- a/scripts/confidential_compute.py +++ b/scripts/confidential_compute.py @@ -7,13 +7,15 @@ import subprocess class ConfidentialComputeConfig(TypedDict): - enclave_memory_mb: int - enclave_cpu_count: int debug_mode: bool api_token: str core_base_url: str optout_base_url: str environment: str + +class AWSConfidentialComputeConfig(ConfidentialComputeConfig): + enclave_memory_mb: int + enclave_cpu_count: int class ConfidentialCompute(ABC): From 62cc490fd4f4b1c1c5bc07bd8c9a82c6da582e11 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Tue, 10 Dec 2024 13:44:10 -0800 Subject: [PATCH 15/28] Remove aws implemnttion from typedict --- scripts/aws/ec2.py | 8 +++++++- scripts/confidential_compute.py | 4 ---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index 9bb8d42c9..e5f9ebd19 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -16,7 +16,13 @@ import yaml sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from confidential_compute import ConfidentialCompute, AWSConfidentialComputeConfig, SecretNotFoundException +from confidential_compute import ConfidentialCompute, ConfidentialComputeConfig, SecretNotFoundException + + +class AWSConfidentialComputeConfig(ConfidentialComputeConfig): + enclave_memory_mb: int + enclave_cpu_count: int + class EC2(ConfidentialCompute): diff --git a/scripts/confidential_compute.py b/scripts/confidential_compute.py index 325844513..395c04f49 100644 --- a/scripts/confidential_compute.py +++ b/scripts/confidential_compute.py @@ -12,10 +12,6 @@ class ConfidentialComputeConfig(TypedDict): core_base_url: str optout_base_url: str environment: str - -class AWSConfidentialComputeConfig(ConfidentialComputeConfig): - enclave_memory_mb: int - enclave_cpu_count: int class ConfidentialCompute(ABC): From 5de70bec6d907e6ce65f5ae5ad6fd5727f48d944 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Wed, 11 Dec 2024 08:27:20 -0800 Subject: [PATCH 16/28] Adding more logs --- scripts/aws/ec2.py | 26 ++++++++++++++++---------- scripts/confidential_compute.py | 9 ++++++++- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index e5f9ebd19..47d219754 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -18,21 +18,20 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from confidential_compute import ConfidentialCompute, ConfidentialComputeConfig, SecretNotFoundException - class AWSConfidentialComputeConfig(ConfidentialComputeConfig): enclave_memory_mb: int enclave_cpu_count: int - class EC2(ConfidentialCompute): def __init__(self): super().__init__() + self.aws_metadata = "169.254.169.254" def __get_aws_token(self) -> str: """Fetches a temporary AWS EC2 metadata token.""" try: - token_url = "http://169.254.169.254/latest/api/token" + token_url = f"http://{self.aws_metadata}/latest/api/token" response = requests.put( token_url, headers={"X-aws-ec2-metadata-token-ttl-seconds": "3600"}, timeout=2 ) @@ -43,7 +42,7 @@ def __get_aws_token(self) -> str: def __get_current_region(self) -> str: """Fetches the current AWS region from EC2 instance metadata.""" token = self.__get_aws_token() - metadata_url = "http://169.254.169.254/latest/dynamic/instance-identity/document" + metadata_url = f"http://{self.aws_metadata}/latest/dynamic/instance-identity/document" headers = {"X-aws-ec2-metadata-token": token} try: response = requests.get(metadata_url, headers=headers, timeout=2) @@ -55,12 +54,14 @@ def __get_current_region(self) -> str: def __validate_aws_specific_config(self, secret): if "enclave_memory_mb" in secret or "enclave_cpu_count" in secret: max_capacity = self.__get_max_capacity() + for key in ["enclave_memory_mb", "enclave_cpu_count"]: if int(secret.get(key, 0)) > max_capacity.get(key): raise ValueError(f"{key} value ({secret.get(key, 0)}) exceeds the maximum allowed ({max_capacity.get(key)}).") + def _get_secret(self, secret_identifier: str) -> AWSConfidentialComputeConfig: - """Fetches a secret value from AWS Secrets Manager.""" + """Fetches a secret value from AWS Secrets Manager and adds defaults""" def add_defaults(configs: Dict[str, any]) -> AWSConfidentialComputeConfig: """Adds default values to configuration if missing.""" @@ -71,6 +72,7 @@ def add_defaults(configs: Dict[str, any]) -> AWSConfidentialComputeConfig: return configs region = self.__get_current_region() + print(f"Running in {region}") try: client = boto3.client("secretsmanager", region_name=region) except Exception as e: @@ -124,7 +126,7 @@ def __run_socks_proxy(self) -> None: def __get_secret_name_from_userdata(self) -> str: """Extracts the secret name from EC2 user data.""" token = self.__get_aws_token() - user_data_url = "http://169.254.169.254/latest/user-data" + user_data_url = f"http://{self.aws_metadata}/latest/user-data" response = requests.get(user_data_url, headers={"X-aws-ec2-metadata-token": token}) user_data = response.text @@ -137,9 +139,7 @@ def __get_secret_name_from_userdata(self) -> str: return match.group(1) if match else default_name def _setup_auxiliaries(self) -> None: - """Sets up the necessary auxiliary services and configuration.""" - self.configs = self._get_secret(self.__get_secret_name_from_userdata()) - self.validate_configuration() + """Sets up the vsock tunnel, socks proxy and flask server""" log_level = 3 if self.configs["debug_mode"] else 1 self.__setup_vsockproxy(log_level) self.__run_config_server() @@ -147,7 +147,7 @@ def _setup_auxiliaries(self) -> None: time.sleep(5) #TODO: Change to while loop if required. def _validate_auxiliaries(self) -> None: - """Validates auxiliary services.""" + """Validates connection to flask server direct and through socks proxy.""" proxy = "socks5://127.0.0.1:3306" config_url = "http://127.0.0.1:27015/getConfig" try: @@ -161,9 +161,14 @@ def _validate_auxiliaries(self) -> None: response.raise_for_status() except requests.RequestException as e: raise RuntimeError(f"Cannot connect to config server via SOCKS proxy: {e}") + print("Connectivity check to config server passes") def run_compute(self) -> None: """Main execution flow for confidential compute.""" + secret_manager_key = self.__get_secret_name_from_userdata() + self.configs = self._get_secret(secret_manager_key) + print(f"Fetched configs from {secret_manager_key}") + self.validate_configuration() self._setup_auxiliaries() self._validate_auxiliaries() command = [ @@ -175,6 +180,7 @@ def run_compute(self) -> None: "--enclave-name", "uid2operator" ] if self.configs["debug_mode"]: + print("Running in debug_mode") command += ["--debug-mode", "--attach-console"] self.run_command(command) diff --git a/scripts/confidential_compute.py b/scripts/confidential_compute.py index 395c04f49..1769da0af 100644 --- a/scripts/confidential_compute.py +++ b/scripts/confidential_compute.py @@ -35,7 +35,9 @@ def validate_operator_key(): raise ValueError( f"Operator key does not match the expected environment ({expected_env})." ) - return True + print("Validated operator key matches environment") + else: + print("Skipping operator key validation") def validate_url(url_key, environment): """URL should include environment except in prod""" @@ -48,6 +50,8 @@ def validate_url(url_key, environment): raise ValueError( f"{url_key} is invalid. Ensure {self.configs[url_key]} follows HTTPS, and doesn't have any path specified." ) + print(f"Validated {self.configs[url_key]} matches other config parameters") + def validate_connectivity() -> None: """ Validates that the core and opt-out URLs are accessible.""" @@ -56,8 +60,10 @@ def validate_connectivity() -> None: optout_url = self.configs["optout_base_url"] core_ip = socket.gethostbyname(urlparse(core_url).netloc) requests.get(core_url, timeout=5) + print(f"Validated connectivity to {core_url}") optout_ip = socket.gethostbyname(urlparse(optout_url).netloc) requests.get(optout_url, timeout=5) + print(f"Validated connectivity to {optout_url}") except (requests.ConnectionError, requests.Timeout) as e: raise Exception( f"Failed to reach required URLs. Consider enabling {core_ip}, {optout_ip} in the egress firewall." @@ -79,6 +85,7 @@ def validate_connectivity() -> None: validate_url("optout_base_url", environment) validate_operator_key() validate_connectivity() + print("Completed static validation of confidential compute config values") @abstractmethod From 77f1f4a64823de682c86cbe04b264a1ba9b3229a Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Wed, 11 Dec 2024 08:33:48 -0800 Subject: [PATCH 17/28] Adding min capacity --- scripts/aws/ec2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index 47d219754..f0cb59356 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -54,12 +54,13 @@ def __get_current_region(self) -> str: def __validate_aws_specific_config(self, secret): if "enclave_memory_mb" in secret or "enclave_cpu_count" in secret: max_capacity = self.__get_max_capacity() - + min_capacity = {"enclave_memory_mb": 10000, "enclave_cpu_count" : 4 } for key in ["enclave_memory_mb", "enclave_cpu_count"]: if int(secret.get(key, 0)) > max_capacity.get(key): raise ValueError(f"{key} value ({secret.get(key, 0)}) exceeds the maximum allowed ({max_capacity.get(key)}).") + if min_capacity.get(key) > int(secret.get(key, 10**9)): + raise ValueError(f"{key} value ({secret.get(key, 0)}) needs to be higher than the minimum required ({min_capacity.get(key)}).") - def _get_secret(self, secret_identifier: str) -> AWSConfidentialComputeConfig: """Fetches a secret value from AWS Secrets Manager and adds defaults""" From a4241fc2745cb37d721961aa3e4b7de6758f1e7f Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Wed, 11 Dec 2024 08:46:25 -0800 Subject: [PATCH 18/28] Loop every sec for 10sec for confg server to be up --- scripts/aws/ec2.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index f0cb59356..455541aef 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -145,17 +145,25 @@ def _setup_auxiliaries(self) -> None: self.__setup_vsockproxy(log_level) self.__run_config_server() self.__run_socks_proxy() - time.sleep(5) #TODO: Change to while loop if required. def _validate_auxiliaries(self) -> None: """Validates connection to flask server direct and through socks proxy.""" proxy = "socks5://127.0.0.1:3306" config_url = "http://127.0.0.1:27015/getConfig" try: - response = requests.get(config_url) + for attempt in range(10): + try: + response = requests.get(config_url) + print("Config server is reachable") + break + except requests.exceptions.ConnectionError as e: + print(f"Connecting to config server, attempt {attempt + 1} failed with ConnectionError: {e}") + time.sleep(1) + else: + raise RuntimeError(f"Config server unreachable") response.raise_for_status() except requests.RequestException as e: - raise RuntimeError(f"Config server unreachable: {e}") + raise RuntimeError(f"Failed to get config from config server: {e}") proxies = {"http": proxy, "https": proxy} try: response = requests.get(config_url, proxies=proxies) From 0bff45688e66bc55720974f303d9187db62a98fb Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Wed, 11 Dec 2024 10:10:12 -0800 Subject: [PATCH 19/28] Fix regex --- scripts/confidential_compute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/confidential_compute.py b/scripts/confidential_compute.py index 1769da0af..5e153d1d5 100644 --- a/scripts/confidential_compute.py +++ b/scripts/confidential_compute.py @@ -26,7 +26,7 @@ def validate_operator_key(): operator_key = self.configs.get("api_token") if not operator_key: raise ValueError("API token is missing from the configuration.") - pattern = r"^(UID2|EUID)-.\-(I|P)-\d+-\*$" + pattern = r"^(UID2|EUID)-.\-(I|P)-\d+-.*$" if re.match(pattern, operator_key): env = self.configs.get("environment", "").lower() debug_mode = self.configs.get("debug_mode", False) From e669887fc633af3a7aa9138394cce620a66b109a Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Wed, 11 Dec 2024 10:53:01 -0800 Subject: [PATCH 20/28] validate after default --- scripts/aws/ec2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index 455541aef..737297eee 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -79,9 +79,9 @@ def add_defaults(configs: Dict[str, any]) -> AWSConfidentialComputeConfig: except Exception as e: raise RuntimeError("Please use IAM instance profile for your instance that has permission to access Secret Manager") try: - secret = json.loads(client.get_secret_value(SecretId=secret_identifier)["SecretString"]) + secret = add_defaults(json.loads(client.get_secret_value(SecretId=secret_identifier)["SecretString"])) self.__validate_aws_specific_config(secret) - return add_defaults(secret) + return secret except ClientError as _: raise SecretNotFoundException(f"{secret_identifier} in {region}") From 85fc3e7ae1c87fd2ea84a82e0805995ab84fec8d Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Wed, 11 Dec 2024 14:41:11 -0800 Subject: [PATCH 21/28] Add tested min values for capacity --- scripts/aws/ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index 737297eee..3dc33fdba 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -54,7 +54,7 @@ def __get_current_region(self) -> str: def __validate_aws_specific_config(self, secret): if "enclave_memory_mb" in secret or "enclave_cpu_count" in secret: max_capacity = self.__get_max_capacity() - min_capacity = {"enclave_memory_mb": 10000, "enclave_cpu_count" : 4 } + min_capacity = {"enclave_memory_mb": 11000, "enclave_cpu_count" : 2 } for key in ["enclave_memory_mb", "enclave_cpu_count"]: if int(secret.get(key, 0)) > max_capacity.get(key): raise ValueError(f"{key} value ({secret.get(key, 0)}) exceeds the maximum allowed ({max_capacity.get(key)}).") From d7b24c7b750de9f4b9bf4305a06c0c613cd85ee8 Mon Sep 17 00:00:00 2001 From: Release Workflow Date: Thu, 12 Dec 2024 00:04:57 +0000 Subject: [PATCH 22/28] [CI Pipeline] Released Snapshot version: 5.43.1-alpha-93-SNAPSHOT --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d817ec3dd..f3b417a59 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.uid2 uid2-operator - 5.43.0 + 5.43.1-alpha-93-SNAPSHOT UTF-8 From 4499dcf181a50e054bf260c59f6988c66b0e4134 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Wed, 11 Dec 2024 17:47:30 -0800 Subject: [PATCH 23/28] Add to build eif stage --- .github/actions/build_aws_eif/action.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/actions/build_aws_eif/action.yaml b/.github/actions/build_aws_eif/action.yaml index f17523a44..08e6d6604 100644 --- a/.github/actions/build_aws_eif/action.yaml +++ b/.github/actions/build_aws_eif/action.yaml @@ -96,8 +96,9 @@ runs: cp ${{ steps.buildFolder.outputs.BUILD_FOLDER }}/identity_scope.txt ${ARTIFACTS_OUTPUT_DIR}/ cp ${{ steps.buildFolder.outputs.BUILD_FOLDER }}/version_number.txt ${ARTIFACTS_OUTPUT_DIR}/ - cp ./scripts/aws/start.sh ${ARTIFACTS_OUTPUT_DIR}/ - cp ./scripts/aws/stop.sh ${ARTIFACTS_OUTPUT_DIR}/ + cp ./scripts/aws/ec2.py ${ARTIFACTS_OUTPUT_DIR}/ + cp ./scripts/confidential_compute.py ${ARTIFACTS_OUTPUT_DIR}/ + cp ./scripts/aws/requirements.txt ${ARTIFACTS_OUTPUT_DIR}/ cp ./scripts/aws/proxies.host.yaml ${ARTIFACTS_OUTPUT_DIR}/ cp ./scripts/aws/sockd.conf ${ARTIFACTS_OUTPUT_DIR}/ cp ./scripts/aws/uid2operator.service ${ARTIFACTS_OUTPUT_DIR}/ From 3dd967d045d97d4031267002eb21a05bf80edad2 Mon Sep 17 00:00:00 2001 From: Release Workflow Date: Thu, 12 Dec 2024 01:48:26 +0000 Subject: [PATCH 24/28] [CI Pipeline] Released Snapshot version: 5.43.2-alpha-94-SNAPSHOT --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index f3b417a59..da0791d9e 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.uid2 uid2-operator - 5.43.1-alpha-93-SNAPSHOT + 5.43.2-alpha-94-SNAPSHOT UTF-8 From 45b290839982dddc4f37ae7288e8d8ff31ed2a08 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Thu, 12 Dec 2024 13:22:02 -0800 Subject: [PATCH 25/28] Dont check for enclave, kill all --- scripts/aws/ec2.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index 3dc33fdba..04cc1f788 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -196,14 +196,7 @@ def run_compute(self) -> None: def cleanup(self) -> None: """Terminates the Nitro Enclave and auxiliary processes.""" try: - describe_output = subprocess.check_output(["nitro-cli", "describe-enclaves"], text=True) - enclaves = json.loads(describe_output) - enclave_id = enclaves[0].get("EnclaveID") if enclaves else None - if enclave_id: - self.run_command(["nitro-cli", "terminate-enclave", "--enclave-id", enclave_id]) - print(f"Terminated enclave with ID: {enclave_id}") - else: - print("No active enclaves found.") + self.run_command(["nitro-cli", "terminate-enclave", "--all"]) self.__kill_auxiliaries() except subprocess.SubprocessError as e: raise (f"Error during cleanup: {e}") From d890e5dc127c3d404864b8a600914eeaa0d3d8b7 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Thu, 12 Dec 2024 14:20:46 -0800 Subject: [PATCH 26/28] Change version on ami build --- .github/workflows/publish-aws-nitro-eif.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish-aws-nitro-eif.yaml b/.github/workflows/publish-aws-nitro-eif.yaml index 8783f6829..31bd87fb4 100644 --- a/.github/workflows/publish-aws-nitro-eif.yaml +++ b/.github/workflows/publish-aws-nitro-eif.yaml @@ -70,7 +70,7 @@ jobs: steps: - name: Build UID2 AWS EIF id: build_uid2_eif - uses: IABTechLab/uid2-operator/.github/actions/build_aws_eif@main + uses: IABTechLab/uid2-operator/.github/actions/build_aws_eif@abu-UID2-4555-EC2-improvements with: identity_scope: uid2 artifacts_base_output_dir: ${{ env.ARTIFACTS_BASE_OUTPUT_DIR }}/uid2 @@ -106,7 +106,7 @@ jobs: steps: - name: Build EUID AWS EIF id: build_euid_eif - uses: IABTechLab/uid2-operator/.github/actions/build_aws_eif@main + uses: IABTechLab/uid2-operator/.github/actions/build_aws_eif@abu-UID2-4555-EC2-improvements with: identity_scope: euid artifacts_base_output_dir: ${{ env.ARTIFACTS_BASE_OUTPUT_DIR }}/euid From 8eeaf9a2d16bd59bee973ec494a56ff5c8d44183 Mon Sep 17 00:00:00 2001 From: Release Workflow Date: Fri, 13 Dec 2024 01:25:55 +0000 Subject: [PATCH 27/28] [CI Pipeline] Released Snapshot version: 5.43.3-alpha-100-SNAPSHOT --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index da0791d9e..ba3801aaf 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.uid2 uid2-operator - 5.43.2-alpha-94-SNAPSHOT + 5.43.3-alpha-100-SNAPSHOT UTF-8 From eb8955cd4021daab947caf47cdaa5b5e7f002ad8 Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Fri, 13 Dec 2024 12:01:33 -0800 Subject: [PATCH 28/28] Use AuxilaryConfig to store and return URLs --- scripts/aws/ec2.py | 65 +++++++++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py index 04cc1f788..b969eb14e 100644 --- a/scripts/aws/ec2.py +++ b/scripts/aws/ec2.py @@ -22,18 +22,42 @@ class AWSConfidentialComputeConfig(ConfidentialComputeConfig): enclave_memory_mb: int enclave_cpu_count: int +class AuxiliaryConfig: + FLASK_PORT: str = "27015" + LOCALHOST: str = "127.0.0.1" + AWS_METADATA: str = "169.254.169.254" + + @classmethod + def get_socks_url(cls) -> str: + return f"socks5://{cls.LOCALHOST}:3306" + + @classmethod + def get_config_url(cls) -> str: + return f"{cls.LOCALHOST}:{cls.FLASK_PORT}/getConfig" + + @classmethod + def get_user_data_url(cls) -> str: + return f"http://{cls.AWS_METADATA}/latest/user-data" + + @classmethod + def get_token_url(cls) -> str: + return f"http://{cls.AWS_METADATA}/latest/api/token" + + @classmethod + def get_meta_url(cls) -> str: + return f"http://{cls.AWS_METADATA}/latest/dynamic/instance-identity/document" + + class EC2(ConfidentialCompute): def __init__(self): super().__init__() - self.aws_metadata = "169.254.169.254" def __get_aws_token(self) -> str: """Fetches a temporary AWS EC2 metadata token.""" try: - token_url = f"http://{self.aws_metadata}/latest/api/token" response = requests.put( - token_url, headers={"X-aws-ec2-metadata-token-ttl-seconds": "3600"}, timeout=2 + AuxiliaryConfig.get_token_url(), headers={"X-aws-ec2-metadata-token-ttl-seconds": "3600"}, timeout=2 ) return response.text except requests.RequestException as e: @@ -42,10 +66,9 @@ def __get_aws_token(self) -> str: def __get_current_region(self) -> str: """Fetches the current AWS region from EC2 instance metadata.""" token = self.__get_aws_token() - metadata_url = f"http://{self.aws_metadata}/latest/dynamic/instance-identity/document" headers = {"X-aws-ec2-metadata-token": token} try: - response = requests.get(metadata_url, headers=headers, timeout=2) + response = requests.get(AuxiliaryConfig.get_meta_url(), headers=headers, timeout=2) response.raise_for_status() return response.json()["region"] except requests.RequestException as e: @@ -114,7 +137,7 @@ def __run_config_server(self) -> None: with open(config_path, 'w') as config_file: json.dump(self.configs, config_file) os.chdir("/opt/uid2operator/config-server") - command = ["./bin/flask", "run", "--host", "127.0.0.1", "--port", "27015"] + command = ["./bin/flask", "run", "--host", AuxiliaryConfig.LOCALHOST, "--port", AuxiliaryConfig.FLASK_PORT] self.run_command(command, seperate_process=True) def __run_socks_proxy(self) -> None: @@ -127,8 +150,7 @@ def __run_socks_proxy(self) -> None: def __get_secret_name_from_userdata(self) -> str: """Extracts the secret name from EC2 user data.""" token = self.__get_aws_token() - user_data_url = f"http://{self.aws_metadata}/latest/user-data" - response = requests.get(user_data_url, headers={"X-aws-ec2-metadata-token": token}) + response = requests.get(AuxiliaryConfig.get_user_data_url(), headers={"X-aws-ec2-metadata-token": token}) user_data = response.text with open("/opt/uid2operator/identity_scope.txt") as file: @@ -148,12 +170,10 @@ def _setup_auxiliaries(self) -> None: def _validate_auxiliaries(self) -> None: """Validates connection to flask server direct and through socks proxy.""" - proxy = "socks5://127.0.0.1:3306" - config_url = "http://127.0.0.1:27015/getConfig" try: for attempt in range(10): try: - response = requests.get(config_url) + response = requests.get(AuxiliaryConfig.get_config_url()) print("Config server is reachable") break except requests.exceptions.ConnectionError as e: @@ -164,22 +184,15 @@ def _validate_auxiliaries(self) -> None: response.raise_for_status() except requests.RequestException as e: raise RuntimeError(f"Failed to get config from config server: {e}") - proxies = {"http": proxy, "https": proxy} + proxies = {"http": AuxiliaryConfig.get_socks_url(), "https": AuxiliaryConfig.get_socks_url()} try: - response = requests.get(config_url, proxies=proxies) + response = requests.get(AuxiliaryConfig.get_config_url(), proxies=proxies) response.raise_for_status() except requests.RequestException as e: raise RuntimeError(f"Cannot connect to config server via SOCKS proxy: {e}") print("Connectivity check to config server passes") - def run_compute(self) -> None: - """Main execution flow for confidential compute.""" - secret_manager_key = self.__get_secret_name_from_userdata() - self.configs = self._get_secret(secret_manager_key) - print(f"Fetched configs from {secret_manager_key}") - self.validate_configuration() - self._setup_auxiliaries() - self._validate_auxiliaries() + def __run_nitro_enclave(self): command = [ "nitro-cli", "run-enclave", "--eif-path", "/opt/uid2operator/uid2operator.eif", @@ -193,6 +206,16 @@ def run_compute(self) -> None: command += ["--debug-mode", "--attach-console"] self.run_command(command) + def run_compute(self) -> None: + """Main execution flow for confidential compute.""" + secret_manager_key = self.__get_secret_name_from_userdata() + self.configs = self._get_secret(secret_manager_key) + print(f"Fetched configs from {secret_manager_key}") + self.validate_configuration() + self._setup_auxiliaries() + self._validate_auxiliaries() + self.__run_nitro_enclave() + def cleanup(self) -> None: """Terminates the Nitro Enclave and auxiliary processes.""" try: