-
Notifications
You must be signed in to change notification settings - Fork 442
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix sagemaker orchestrator and step operator env vars and other minor…
… bugs (#1993) * Support union for flavor schema properties in the CLI * Fix AWS container registry check for existing repos * Split large env vars into chunks for the sagemaker orchestrator * Fix docstring * Move env var splitting to its own utils module * Split env vars passed to the sagemaker step operator * Fix errors * Add unit tests for env splitting utilities * Actually adding the unit tests
- Loading branch information
1 parent
80935c8
commit eb32d88
Showing
10 changed files
with
332 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
39 changes: 39 additions & 0 deletions
39
src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator_entrypoint_config.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# Copyright (c) ZenML GmbH 2023. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at: | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
# or implied. See the License for the specific language governing | ||
# permissions and limitations under the License. | ||
"""Entrypoint configuration for ZenML Sagemaker pipeline steps.""" | ||
|
||
from zenml.entrypoints.step_entrypoint_configuration import ( | ||
StepEntrypointConfiguration, | ||
) | ||
from zenml.utils.env_utils import reconstruct_environment_variables | ||
|
||
SAGEMAKER_PROCESSOR_STEP_ENV_VAR_SIZE_LIMIT = 256 | ||
|
||
|
||
class SagemakerEntrypointConfiguration(StepEntrypointConfiguration): | ||
"""Entrypoint configuration for ZenML Sagemaker pipeline steps. | ||
The only purpose of this entrypoint configuration is to reconstruct the | ||
environment variables that exceed the maximum length of 256 characters | ||
allowed for Sagemaker Processor steps from their individual components. | ||
""" | ||
|
||
def run(self) -> None: | ||
"""Runs the step.""" | ||
# Reconstruct the environment variables that exceed the maximum length | ||
# of 256 characters from their individual chunks | ||
reconstruct_environment_variables() | ||
|
||
# Run the step | ||
super().run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
39 changes: 39 additions & 0 deletions
39
src/zenml/integrations/aws/step_operators/sagemaker_step_operator_entrypoint_config.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# Copyright (c) ZenML GmbH 2023. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at: | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
# or implied. See the License for the specific language governing | ||
# permissions and limitations under the License. | ||
"""Entrypoint configuration for ZenML Sagemaker step operator.""" | ||
|
||
from zenml.step_operators.step_operator_entrypoint_configuration import ( | ||
StepOperatorEntrypointConfiguration, | ||
) | ||
from zenml.utils.env_utils import reconstruct_environment_variables | ||
|
||
SAGEMAKER_ESTIMATOR_STEP_ENV_VAR_SIZE_LIMIT = 512 | ||
|
||
|
||
class SagemakerEntrypointConfiguration(StepOperatorEntrypointConfiguration): | ||
"""Entrypoint configuration for ZenML Sagemaker step operator. | ||
The only purpose of this entrypoint configuration is to reconstruct the | ||
environment variables that exceed the maximum length of 512 characters | ||
allowed for Sagemaker Estimator steps from their individual components. | ||
""" | ||
|
||
def run(self) -> None: | ||
"""Runs the step.""" | ||
# Reconstruct the environment variables that exceed the maximum length | ||
# of 512 characters from their individual chunks | ||
reconstruct_environment_variables() | ||
|
||
# Run the step | ||
super().run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
# Copyright (c) ZenML GmbH 2023. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at: | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
# or implied. See the License for the specific language governing | ||
# permissions and limitations under the License. | ||
"""Utility functions for handling environment variables.""" | ||
import os | ||
from typing import Dict, List, Optional, cast | ||
|
||
ENV_VAR_CHUNK_SUFFIX = "_CHUNK_" | ||
|
||
|
||
def split_environment_variables( | ||
size_limit: int, | ||
env: Optional[Dict[str, str]] = None, | ||
) -> None: | ||
"""Split long environment variables into chunks. | ||
Splits the input environment variables with values that exceed the supplied | ||
maximum length into individual components. The input environment variables | ||
are modified in-place. | ||
Args: | ||
size_limit: Maximum length of an environment variable value. | ||
env: Input environment variables dictionary. If not supplied, the | ||
OS environment variables are used. | ||
Raises: | ||
RuntimeError: If an environment variable value is too large and requires | ||
more than 10 chunks. | ||
""" | ||
if env is None: | ||
env = cast(Dict[str, str], os.environ) | ||
|
||
for key, value in env.copy().items(): | ||
if len(value) <= size_limit: | ||
continue | ||
|
||
# We keep the number of chunks to a maximum of 10 to avoid generating | ||
# too many environment variables chunks and also to make the | ||
# reconstruction easier to implement | ||
if len(value) > size_limit * 10: | ||
raise RuntimeError( | ||
f"Environment variable {key} exceeds the maximum length of " | ||
f"{size_limit * 10} characters." | ||
) | ||
|
||
env.pop(key) | ||
|
||
# Split the environment variable into chunks | ||
chunks = [ | ||
value[i : i + size_limit] for i in range(0, len(value), size_limit) | ||
] | ||
for i, chunk in enumerate(chunks): | ||
env[f"{key}{ENV_VAR_CHUNK_SUFFIX}{i}"] = chunk | ||
|
||
|
||
def reconstruct_environment_variables( | ||
env: Optional[Dict[str, str]] = None | ||
) -> None: | ||
"""Reconstruct environment variables that were split into chunks. | ||
Reconstructs the environment variables with values that were split into | ||
individual chunks because they were too large. The input environment | ||
variables are modified in-place. | ||
Args: | ||
env: Input environment variables dictionary. If not supplied, the OS | ||
environment variables are used. | ||
""" | ||
if env is None: | ||
env = cast(Dict[str, str], os.environ) | ||
|
||
chunks: Dict[str, List[str]] = {} | ||
for key in env.keys(): | ||
if not key[:-1].endswith(ENV_VAR_CHUNK_SUFFIX): | ||
continue | ||
|
||
# Collect all chunks of the same environment variable | ||
original_key = key[: -(len(ENV_VAR_CHUNK_SUFFIX) + 1)] | ||
chunks.setdefault(original_key, []) | ||
chunks[original_key].append(key) | ||
|
||
# Reconstruct the environment variables from their chunks | ||
for key, chunk_keys in chunks.items(): | ||
chunk_keys.sort() | ||
value = "".join([env[key] for key in chunk_keys]) | ||
env[key] = value | ||
|
||
# Remove the chunk environment variables | ||
for key in chunk_keys: | ||
env.pop(key) |
Oops, something went wrong.