Skip to content

Commit

Permalink
Refactor sagemaker-custom-kernel module to use Pydantic (#106)
Browse files Browse the repository at this point in the history
* chore: Refactor sagemaker-custom-kernel to use pydantic for inputs

* update requirements.txt

* add tags

* fix tests

* update changelog

* fix mount_path parameter

* fix unit tests
  • Loading branch information
LeonLuttenberger authored Jun 3, 2024
1 parent 323cf1a commit c9f3ee7
Show file tree
Hide file tree
Showing 10 changed files with 161 additions and 112 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `sagemaker-studio`
- `sagemaker-endpoint`
- `sagemaker-templates-service-catalog`
- `sagemaker-custom-kernel`
- `qna-rag`
- add CDK nag to `qna-rag` module
- rename seedfarmer project name to `aiops`
Expand Down
16 changes: 8 additions & 8 deletions modules/sagemaker/sagemaker-custom-kernel/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@ This module builds custom kernel for SageMaker studio from a Dockerfile.

#### Required

- `ecr_repo_name`: Name of the ECR repo for the image.
- `studio_domain_id`: SageMaker studio domain to attach the kernel to.
- `studio_domain_name`: SageMaker studio name to attach the kernel to.
- `sagemaker_image_name`: Name of the sagemaker image. This variable is also used to find the Dockerfile. The docker build script will be looking for file inside `modules/mlops/custom-kernel/docker/{sagemaker_image_name}`. 1 Dockerfile is added already: `pytorch-10`.
- `ecr-repo-name`: Name of the ECR repo for the image.
- `studio-domain-id`: SageMaker studio domain to attach the kernel to.
- `studio-domain-name`: SageMaker studio name to attach the kernel to.
- `sagemaker-image-name`: Name of the sagemaker image. This variable is also used to find the Dockerfile. The docker build script will be looking for file inside `modules/mlops/custom-kernel/docker/{sagemaker_image_name}`. 1 Dockerfile is added already: `pytorch-10`.
- `studio-execution-role-arn`: SageMaker Studio Domain execution role. Required to associate custom kernel with SageMaker Studio Domain.

#### Optional

- `app_image_config_name`: Name of the app image config. Defaults to `idf-{deployment_name}-app-config`
- `kernel_user_uuid`: Default Unix User ID, defaults to: 1000
- `kernel_user_guid`: Default Unix Group ID, defaults to 100
- `kernel_user_mount_path`: # Path to mount in SageMaker Studio, defaults to `/home/sagemaker-user`
- `app-image-config-name`: Name of the app image config. Defaults to `idf-{deployment_name}-app-config`
- `kernel-user-uuid`: Default Unix User ID, defaults to: 1000
- `kernel-user-guid`: Default Unix Group ID, defaults to 100
- `kernel-user-mount-path`: # Path to mount in SageMaker Studio, defaults to `/home/sagemaker-user`

### Module Metadata Outputs

Expand Down
75 changes: 31 additions & 44 deletions modules/sagemaker/sagemaker-custom-kernel/app.py
Original file line number Diff line number Diff line change
@@ -1,72 +1,55 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

import os

import aws_cdk as cdk
import cdk_nag
from aws_cdk import CfnOutput
from pydantic import ValidationError

from settings import ApplicationSettings
from stack import CustomKernelStack

project_name = os.getenv("SEEDFARMER_PROJECT_NAME", "")
deployment_name = os.getenv("SEEDFARMER_DEPLOYMENT_NAME", "")
module_name = os.getenv("SEEDFARMER_MODULE_NAME", "")
app_prefix = f"{project_name}-{deployment_name}-{module_name}"

DEFAULT_APP_IMAGE_CONFIG_NAME = f"{app_prefix}-app-config"
DEFAULT_SAGEMAKER_IMAGE_NAME = f"{app_prefix}-echo-kernel"
DEFAULT_CUSTOM_KERNEL_NAME = f"{app_prefix}-echo-kernel"
DEFAULT_USER_UID = 1000
DEFAULT_USER_GID = 100
DEFAULT_KERNEL_USER_HOME_MOUNT_PATH = "/home/sagemaker-user"


def _param(name: str) -> str:
return f"SEEDFARMER_PARAMETER_{name}"


sagemaker_image_name = os.getenv(_param("SAGEMAKER_IMAGE_NAME"), DEFAULT_SAGEMAKER_IMAGE_NAME)
ecr_repo_name = os.getenv(_param("ECR_REPO_NAME")) # type: ignore
app_image_config_name = os.getenv(_param("APP_IMAGE_CONFIG_NAME"), DEFAULT_APP_IMAGE_CONFIG_NAME)
custom_kernel_name = os.getenv(_param("CUSTOM_KERNEL_NAME"), DEFAULT_CUSTOM_KERNEL_NAME)
kernel_user_uid = os.getenv(_param("KERNEL_USER_UID"), DEFAULT_USER_UID)
kernel_user_gid = os.getenv(_param("KERNEL_USER_GID"), DEFAULT_USER_GID)
mount_path = os.getenv(_param("KERNEL_USER_HOME_MOUNT_PATH"), DEFAULT_KERNEL_USER_HOME_MOUNT_PATH)
sm_studio_domain_id = os.getenv(_param("STUDIO_DOMAIN_ID"))
sm_studio_domain_name = os.getenv(_param("STUDIO_DOMAIN_NAME"))


if not ecr_repo_name:
raise Exception("Missing input parameter ecr-repo-name")
app = cdk.App()

try:
app_settings = ApplicationSettings()
except ValidationError as e:
print(e)
raise e

environment = cdk.Environment(
account=os.environ["CDK_DEFAULT_ACCOUNT"],
region=os.environ["CDK_DEFAULT_REGION"],
app_image_config_name = (
app_settings.module_settings.app_image_config_name or f"{app_settings.seedfarmer_settings.app_prefix}-app-config"
)
sagemaker_image_name = (
app_settings.module_settings.sagemaker_image_name or f"{app_settings.seedfarmer_settings.app_prefix}-echo-kernel"
)
custom_kernel_name = (
app_settings.module_settings.custom_kernel_name or f"{app_settings.seedfarmer_settings.app_prefix}-echo-kernel"
)

app = cdk.App()
stack = CustomKernelStack(
scope=app,
construct_id=app_prefix,
app_prefix=app_prefix,
env=environment,
construct_id=app_settings.seedfarmer_settings.app_prefix,
sagemaker_image_name=sagemaker_image_name,
ecr_repo_name=ecr_repo_name,
ecr_repo_name=app_settings.module_settings.ecr_repo_name,
app_image_config_name=app_image_config_name,
custom_kernel_name=custom_kernel_name,
kernel_user_uid=int(kernel_user_uid),
kernel_user_gid=int(kernel_user_gid),
mount_path=mount_path,
kernel_user_uid=app_settings.module_settings.kernel_user_uid,
kernel_user_gid=app_settings.module_settings.kernel_user_gid,
mount_path=app_settings.module_settings.kernel_user_home_mount_path,
env=cdk.Environment(
account=app_settings.cdk_settings.account,
region=app_settings.cdk_settings.region,
),
tags=app_settings.module_settings.tags,
)

CfnOutput(
scope=stack,
id="metadata",
value=stack.to_json_string(
{
"ECRRepositoryName": ecr_repo_name,
"ECRRepositoryName": app_settings.module_settings.ecr_repo_name,
"CustomKernelImageName": sagemaker_image_name,
"CustomKernelImageURI": stack.image_uri,
"AppImageConfigName": app_image_config_name,
Expand All @@ -77,4 +60,8 @@ def _param(name: str) -> str:

cdk.Aspects.of(app).add(cdk_nag.AwsSolutionsChecks(log_ignores=True))

cdk.Tags.of(app).add("SeedFarmerDeploymentName", app_settings.seedfarmer_settings.deployment_name)
cdk.Tags.of(app).add("SeedFarmerModuleName", app_settings.seedfarmer_settings.module_name)
cdk.Tags.of(app).add("SeedFarmerProjectName", app_settings.seedfarmer_settings.project_name)

app.synth()
3 changes: 3 additions & 0 deletions modules/sagemaker/sagemaker-custom-kernel/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ disallow_untyped_decorators = false
exclude = "codeseeder.out/|example/|tests/|scripts/"
warn_unused_ignores = false

plugins = [
"pydantic.mypy"
]

[tool.pytest.ini_options]
addopts = "-v --cov=. --cov-report term"
Expand Down
4 changes: 3 additions & 1 deletion modules/sagemaker/sagemaker-custom-kernel/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
aws-cdk-lib==2.135.0
constructs>=10.0.0,<11.0.0
cdk-nag==2.12.29
cdk-ecr-deployment==3.0.43
cdk-ecr-deployment==3.0.43
pydantic==2.7.2
pydantic-settings==2.2.1
83 changes: 83 additions & 0 deletions modules/sagemaker/sagemaker-custom-kernel/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Defines the stack settings."""

from abc import ABC
from typing import Dict, Optional

from pydantic import Field, computed_field
from pydantic_settings import BaseSettings, SettingsConfigDict


class EnvBaseSettings(BaseSettings, ABC):
"""Defines common configuration for settings."""

model_config = SettingsConfigDict(
case_sensitive=False,
env_nested_delimiter="__",
protected_namespaces=(),
extra="ignore",
populate_by_name=True,
)


class ModuleSettings(EnvBaseSettings):
"""Seedfarmer Parameters.
These parameters are required for the module stack.
"""

model_config = SettingsConfigDict(env_prefix="SEEDFARMER_PARAMETER_")

studio_domain_id: str
studio_domain_name: str
studio_execution_role_arn: str
ecr_repo_name: str

sagemaker_image_name: Optional[str] = Field(default=None)
app_image_config_name: Optional[str] = Field(default=None)
custom_kernel_name: Optional[str] = Field(default=None)

kernel_user_uid: int = Field(default=1000)
kernel_user_gid: int = Field(default=100)
kernel_user_home_mount_path: str = Field(default="/home/sagemaker-user")

tags: Optional[Dict[str, str]] = Field(default=None)


class SeedFarmerSettings(EnvBaseSettings):
"""Seedfarmer Settings.
These parameters comes from seedfarmer by default.
"""

model_config = SettingsConfigDict(env_prefix="SEEDFARMER_")

project_name: str = Field(default="")
deployment_name: str = Field(default="")
module_name: str = Field(default="")

@computed_field # type: ignore
@property
def app_prefix(self) -> str:
"""Application prefix."""
prefix = "-".join([self.project_name, self.deployment_name, self.module_name])
return prefix


class CDKSettings(EnvBaseSettings):
"""CDK Default Settings.
These parameters comes from AWS CDK by default.
"""

model_config = SettingsConfigDict(env_prefix="CDK_DEFAULT_")

account: str
region: str


class ApplicationSettings(EnvBaseSettings):
"""Application settings."""

seedfarmer_settings: SeedFarmerSettings = Field(default_factory=SeedFarmerSettings)
module_settings: ModuleSettings = Field(default_factory=ModuleSettings)
cdk_settings: CDKSettings = Field(default_factory=CDKSettings)
20 changes: 0 additions & 20 deletions modules/sagemaker/sagemaker-custom-kernel/setup.cfg

This file was deleted.

7 changes: 2 additions & 5 deletions modules/sagemaker/sagemaker-custom-kernel/stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
from typing import Any

from aws_cdk import Stack, Tags
from aws_cdk import Stack
from aws_cdk import aws_ecr as ecr
from aws_cdk import aws_iam as iam
from aws_cdk import aws_sagemaker as sagemaker
Expand All @@ -19,7 +19,6 @@ def __init__(
self,
scope: Construct,
construct_id: str,
app_prefix: str,
sagemaker_image_name: str,
ecr_repo_name: str,
app_image_config_name: str,
Expand All @@ -31,10 +30,8 @@ def __init__(
) -> None:
super().__init__(scope, construct_id, **kwargs)

Tags.of(self).add(key="Deployment", value=app_prefix[:64])

# ECR Image deployment
repo = ecr.Repository.from_repository_name(self, id=f"{app_prefix}-ecr-repo", repository_name=ecr_repo_name)
repo = ecr.Repository.from_repository_name(self, id=f"{id}-ecr-repo", repository_name=ecr_repo_name)

local_image = DockerImageAsset(
self,
Expand Down
43 changes: 27 additions & 16 deletions modules/sagemaker/sagemaker-custom-kernel/tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,41 @@

import os
import sys
from unittest import mock

import pytest
from pydantic import ValidationError


@pytest.fixture(scope="function")
@pytest.fixture(scope="function", autouse=True)
def stack_defaults():
os.environ["SEEDFARMER_PROJECT_NAME"] = "test-project"
os.environ["SEEDFARMER_DEPLOYMENT_NAME"] = "test-deployment"
os.environ["SEEDFARMER_MODULE_NAME"] = "test-module"
os.environ["CDK_DEFAULT_ACCOUNT"] = "111111111111"
os.environ["CDK_DEFAULT_REGION"] = "us-east-1"
os.environ["SEEDFARMER_PARAMETER_SAGEMAKER_IMAGE_NAME"] = "echo-kernel"
os.environ["SEEDFARMER_PARAMETER_ECR_REPO_NAME"] = "repo"
# Unload the app import so that subsequent tests don't reuse
if "app" in sys.modules:
del sys.modules["app"]


def test_app(stack_defaults):
with mock.patch.dict(os.environ, {}, clear=True):
os.environ["SEEDFARMER_PROJECT_NAME"] = "test-project"
os.environ["SEEDFARMER_DEPLOYMENT_NAME"] = "test-deployment"
os.environ["SEEDFARMER_MODULE_NAME"] = "test-module"

os.environ["CDK_DEFAULT_ACCOUNT"] = "111111111111"
os.environ["CDK_DEFAULT_REGION"] = "us-east-1"

os.environ["SEEDFARMER_PARAMETER_SAGEMAKER_IMAGE_NAME"] = "echo-kernel"
os.environ["SEEDFARMER_PARAMETER_STUDIO_DOMAIN_NAME"] = "test-studio"
os.environ["SEEDFARMER_PARAMETER_STUDIO_DOMAIN_ID"] = "studio-id"
os.environ["SEEDFARMER_PARAMETER_STUDIO_EXECUTION_ROLE_ARN"] = "user-arn"
os.environ["SEEDFARMER_PARAMETER_ECR_REPO_NAME"] = "repo"

# Unload the app import so that subsequent tests don't reuse
if "app" in sys.modules:
del sys.modules["app"]

yield


def test_app():
import app # noqa: F401


def test_sagemaker_ecr_repo_name(stack_defaults):
def test_sagemaker_ecr_repo_name():
del os.environ["SEEDFARMER_PARAMETER_ECR_REPO_NAME"]

with pytest.raises(Exception):
with pytest.raises(ValidationError):
import app # noqa: F401
21 changes: 3 additions & 18 deletions modules/sagemaker/sagemaker-custom-kernel/tests/test_stack.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,14 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

import os
import sys

import aws_cdk as cdk
import cdk_nag
import pytest
from aws_cdk.assertions import Annotations, Match, Template


@pytest.fixture(scope="function")
def stack_defaults():
os.environ["CDK_DEFAULT_ACCOUNT"] = "111111111111"
os.environ["CDK_DEFAULT_REGION"] = "us-east-1"

# Unload the app import so that subsequent tests don't reuse

if "stack" in sys.modules:
del sys.modules["stack"]


@pytest.fixture(scope="function")
def stack(stack_defaults) -> cdk.Stack:
def stack() -> cdk.Stack:
import stack

app = cdk.App()
Expand All @@ -42,10 +28,9 @@ def stack(stack_defaults) -> cdk.Stack:
app,
app_prefix,
env=cdk.Environment(
account=os.environ["CDK_DEFAULT_ACCOUNT"],
region=os.environ["CDK_DEFAULT_REGION"],
account="111111111111",
region="us-east-1",
),
app_prefix=app_prefix,
sagemaker_image_name=sagemaker_image_name,
ecr_repo_name=ecr_repo_name,
app_image_config_name=app_image_config_name,
Expand Down

0 comments on commit c9f3ee7

Please sign in to comment.