Skip to content

Commit

Permalink
Refactor dependencies for gateway (#251)
Browse files Browse the repository at this point in the history
  • Loading branch information
parasj authored Apr 6, 2022
1 parent 6bb7806 commit 7759318
Show file tree
Hide file tree
Showing 16 changed files with 85 additions and 65 deletions.
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ RUN (echo 'net.ipv4.ip_local_port_range = 12000 65535' >> /etc/sysctl.conf) \

# install apt packages
RUN --mount=type=cache,target=/var/cache/apt apt update \
&& apt-get install --no-install-recommends -y git wget ca-certificates build-essential graphviz \
&& apt-get install --no-install-recommends -y ca-certificates build-essential \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# install gateway
COPY scripts/requirements-gateway.txt /tmp/requirements-gateway.txt
RUN --mount=type=cache,target=/root/.cache/pip pip3 install --no-cache-dir --compile -r /tmp/requirements-gateway.txt && rm -r /tmp/requirements-gateway.txt
RUN --mount=type=cache,target=/root/.cache/pip pip3 install --no-cache-dir -r /tmp/requirements-gateway.txt && rm -r /tmp/requirements-gateway.txt

WORKDIR /pkg
COPY . .
RUN pip3 install -e .
RUN pip3 install --no-dependencies -e ".[gateway]"

CMD ["python3", "skylark/gateway/gateway_daemon.py"]
3 changes: 2 additions & 1 deletion scripts/experiment_paras.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ cp ${filename} data/results/${experiment}
skylark replicate-json ${filename} \
--source-bucket $src_bucket \
--dest-bucket $dest_bucket \
--key-prefix ${key_prefix} |& tee data/results/${experiment}/obj-store-logs.txt
--src-key-prefix ${key_prefix} \
--dest-key-prefix ${key_prefix} |& tee data/results/${experiment}/obj-store-logs.txt
tail -1 data/results/${experiment}/obj-store-logs.txt
echo ${experiment}
18 changes: 10 additions & 8 deletions scripts/requirements-gateway.txt
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
# cloud dependencies
awscrt
azure-identity
azure-mgmt-compute
azure-mgmt-network
azure-mgmt-resource
# removed from gateway due to large size
# azure-mgmt-compute
# azure-mgmt-network
# azure-mgmt-resource
azure-mgmt-storage
azure-mgmt-authorization
azure-storage-blob>=12.0.0
boto3
click>=7.1.2
flask
google-api-python-client
google-auth
google-cloud-compute
google-cloud-storage
grpcio-status>=1.33.2
numpy
# shared dependencies
cachetools
oslo.concurrency
paramiko
setproctitle
termcolor
testresources
tqdm
# gateway dependencies
flask
pyopenssl
werkzeug
34 changes: 20 additions & 14 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
packages=["skylark"],
python_requires=">=3.7",
install_requires=[
# cloud integrations
"awscrt",
"azure-identity",
"azure-mgmt-compute",
Expand All @@ -16,36 +17,41 @@
"azure-mgmt-authorization",
"azure-storage-blob>=12.0.0",
"boto3",
"click>=7.1.2",
"flask",
"google-api-python-client",
"google-auth",
"google-cloud-compute",
"google-cloud-storage",
"grpcio-status>=1.33.2",
"numpy",
# client dependencies
"click",
"pandas",
"questionary",
"typer",
# shared dependencies
"cachetools",
"oslo.concurrency",
"paramiko",
"pyopenssl",
"setproctitle",
"termcolor",
"testresources",
"tqdm",
"werkzeug",
],
extras_require={
"all": [
"cachetools",
"click",
"solver": [
"cvxopt",
"cvxpy",
"graphviz",
"matplotlib",
"pandas",
"questionary",
"typer",
"numpy",
],
"gateway": [
"flask",
"pyopenssl",
"werkzeug",
],
"experiments": [
"matplotlib",
"numpy",
"ray",
],
"experiments": ["ray"],
"test": ["black", "ipython", "jupyter_console", "pytest", "pytype"],
},
entry_points={"console_scripts": ["skylark=skylark.cli.cli:app"]},
Expand Down
2 changes: 2 additions & 0 deletions skylark/cli/cli_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,9 +355,11 @@ def load_aws_config(config: SkylarkConfig) -> SkylarkConfig:
config.aws_enabled = True
return config


def create_aws_region_config(config):
AWSAuthentication.save_region_config(config)


def load_azure_config(config: SkylarkConfig, force_init: bool = False) -> SkylarkConfig:
if force_init:
typer.secho(" Azure credentials will be re-initialized", fg="red")
Expand Down
22 changes: 10 additions & 12 deletions skylark/compute/aws/aws_auth.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
from pydoc import describe
import threading
from typing import Optional
import typer

import boto3

from skylark.config import SkylarkConfig
from skylark import config_path
from skylark import aws_config_path
from skylark import aws_config_path


class AWSAuthentication:
Expand All @@ -19,7 +17,7 @@ def __init__(self, config: Optional[SkylarkConfig] = None, access_key: Optional[
self.config = config
else:
self.config = SkylarkConfig.load_config(config_path)

if access_key and secret_key:
self.config_mode = "manual"
self._access_key = access_key
Expand All @@ -28,29 +26,29 @@ def __init__(self, config: Optional[SkylarkConfig] = None, access_key: Optional[
self.config_mode = "iam_inferred"
self._access_key = None
self._secret_key = None

@staticmethod
def save_region_config(config):
with open(aws_config_path, "w") as f:
if config.aws_enabled == False:
f.write("")
return
region_list = []
describe_regions = boto3.client('ec2', region_name="us-east-1").describe_regions()
for region in describe_regions['Regions']:
if region['OptInStatus'] == 'opt-in-not-required' or region['OptInStatus'] == 'opted-in':
region_text = region['Endpoint']
region_name = region_text[region_text.find('.') + 1 :region_text.find(".amazon")]
describe_regions = boto3.client("ec2", region_name="us-east-1").describe_regions()
for region in describe_regions["Regions"]:
if region["OptInStatus"] == "opt-in-not-required" or region["OptInStatus"] == "opted-in":
region_text = region["Endpoint"]
region_name = region_text[region_text.find(".") + 1 : region_text.find(".amazon")]
region_list.append(region_name)
f.write("\n".join(region_list))
typer.secho(f" AWS region config file saved to {aws_config_path}", fg="green")
print(f" AWS region config file saved to {aws_config_path}")

@staticmethod
def get_region_config():
try:
f = open(aws_config_path, "r")
except FileNotFoundError:
typer.secho(" No AWS config detected! Consquently, the AWS region list is empty. Run 'skylark init' to remedy this.", fg="red")
print(" No AWS config detected! Consquently, the AWS region list is empty. Run 'skylark init' to remedy this.")
return []
region_list = []
for region in f.read().split("\n"):
Expand Down
8 changes: 6 additions & 2 deletions skylark/compute/aws/aws_cloud_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@
from pathlib import Path

import botocore
import pandas as pd
from skylark.compute.aws.aws_auth import AWSAuthentication
from skylark.utils import logger
import typer
from skylark import key_root

from oslo_concurrency import lockutils
Expand All @@ -17,6 +15,12 @@
from skylark.compute.cloud_providers import CloudProvider
from skylark.utils.utils import retry_backoff, wait_for

try:
import pandas as pd
except ImportError:
pd = None
logger.warning("pandas not installed, will not be able to load transfer costs")


class AWSCloudProvider(CloudProvider):
def __init__(self):
Expand Down
22 changes: 19 additions & 3 deletions skylark/compute/azure/azure_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
import threading
from typing import Optional
from azure.identity import DefaultAzureCredential
from azure.mgmt.compute import ComputeManagementClient
from azure.mgmt.network import NetworkManagementClient
from azure.mgmt.resource import ResourceManagementClient
from azure.mgmt.authorization import AuthorizationManagementClient
from azure.mgmt.storage import StorageManagementClient
from azure.storage.blob import BlobServiceClient, ContainerClient
Expand All @@ -15,6 +12,22 @@
from skylark.config import SkylarkConfig
from skylark import config_path

# optional imports due to large package size
try:
from azure.mgmt.network import NetworkManagementClient
except ImportError:
NetworkManagementClient = None

try:
from azure.mgmt.compute import ComputeManagementClient
except ImportError:
ComputeManagementClient = None

try:
from azure.mgmt.resource import ResourceManagementClient
except ImportError:
ResourceManagementClient = None


class AzureAuthentication:
__cached_credentials = threading.local()
Expand Down Expand Up @@ -55,12 +68,15 @@ def get_token(self, resource: str):
return self.credential.get_token(resource)

def get_compute_client(self):
assert ComputeManagementClient is not None, "ComputeManagementClient is not installed"
return ComputeManagementClient(self.credential, self.subscription_id)

def get_resource_client(self):
assert ResourceManagementClient is not None, "ResourceManagementClient is not installed"
return ResourceManagementClient(self.credential, self.subscription_id)

def get_network_client(self):
assert NetworkManagementClient is not None, "NetworkManagementClient is not installed"
return NetworkManagementClient(self.credential, self.subscription_id)

def get_authorization_client(self):
Expand Down
11 changes: 5 additions & 6 deletions skylark/compute/azure/azure_cloud_provider.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
from multiprocessing import BoundedSemaphore
import os
import uuid
from multiprocessing import BoundedSemaphore
from pathlib import Path
from typing import List, Optional

import paramiko
from skylark.compute.azure.azure_auth import AzureAuthentication
from skylark.utils import logger
from skylark import key_root
from skylark.compute.azure.azure_auth import AzureAuthentication
from skylark.compute.azure.azure_server import AzureServer
from skylark.compute.cloud_providers import CloudProvider
from azure.mgmt.authorization.models import RoleAssignmentCreateParameters
from skylark.utils import logger
from skylark.utils.utils import Timer, do_parallel

from azure.mgmt.authorization.models import RoleAssignmentCreateParameters
from azure.mgmt.compute.models import ResourceIdentityType

from skylark.utils.utils import Timer, do_parallel


class AzureCloudProvider(CloudProvider):
def __init__(self, key_root=key_root / "azure"):
Expand Down
12 changes: 6 additions & 6 deletions skylark/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ class SkylarkConfig:
@staticmethod
def default_config() -> "SkylarkConfig":
return SkylarkConfig(
aws_enabled=False,
azure_enabled=False,
gcp_enabled=False,
azure_subscription_id=None,
gcp_project_id=None,
)
aws_enabled=False,
azure_enabled=False,
gcp_enabled=False,
azure_subscription_id=None,
gcp_project_id=None,
)

@staticmethod
def load_config(path) -> "SkylarkConfig":
Expand Down
File renamed without changes.
2 changes: 0 additions & 2 deletions skylark/gateway/gateway_daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from typing import Dict


import setproctitle
from skylark.utils import logger
from skylark import MB, print_header
from skylark.chunk import ChunkState
Expand Down Expand Up @@ -47,7 +46,6 @@ def __init__(self, region: str, outgoing_ports: Dict[str, int], chunk_dir: PathL
logger.info(f"[gateway_daemon] API started at {self.api_server.url}")

def run(self):
setproctitle.setproctitle(f"skylark-gateway-daemon")
exit_flag = Event()

def exit_handler(signum, frame):
Expand Down
2 changes: 0 additions & 2 deletions skylark/gateway/gateway_obj_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from multiprocessing import Event, Manager, Process, Value
from typing import Dict, Optional

import setproctitle
from skylark.gateway.chunk_store import ChunkStore
from skylark.utils import logger
from skylark.chunk import ChunkRequest
Expand Down Expand Up @@ -61,7 +60,6 @@ def stop_workers(self):

def worker_loop(self, worker_id: int):
# todo should this use processes instead of threads?
setproctitle.setproctitle(f"skylark-gateway-obj-store:{worker_id}")
self.worker_id = worker_id

while not self.exit_flags[worker_id].is_set():
Expand Down
4 changes: 1 addition & 3 deletions skylark/gateway/gateway_receiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@
from multiprocessing import Event, Process, Value
from typing import Tuple

import setproctitle
from skylark.utils import logger
from skylark import GB, MB
from skylark.chunk import WireProtocolHeader
from skylark.gateway.chunk_store import ChunkStore
from skylark.utils.cert import generate_self_signed_certificate
from skylark.gateway.cert import generate_self_signed_certificate
from skylark.utils.utils import Timer


Expand Down Expand Up @@ -56,7 +55,6 @@ def signal_handler(signal, frame):
exit_flag.value = 1

signal.signal(signal.SIGINT, signal_handler)
setproctitle.setproctitle(f"skylark-gateway-receiver:{socket_port}")

sock.listen()
if self.ssl_context is not None:
Expand Down
2 changes: 0 additions & 2 deletions skylark/gateway/gateway_sender.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from typing import Dict, List, Optional

import requests
import setproctitle
from skylark.utils import logger
from skylark import MB
from skylark.chunk import ChunkRequest
Expand Down Expand Up @@ -57,7 +56,6 @@ def stop_workers(self):
self.processes = []

def worker_loop(self, worker_id: int, dest_ip: str):
setproctitle.setproctitle(f"skylark-gateway-sender:{worker_id}")
self.worker_id = worker_id

while not self.exit_flags[worker_id].is_set():
Expand Down
Loading

0 comments on commit 7759318

Please sign in to comment.