Skip to content

Commit

Permalink
Add support for packaging python AWS Lambda layers (#19123)
Browse files Browse the repository at this point in the history
This fixes #18880 by making pants able to create a AWS Lambda package
with the layout expected by a "Layer"
(https://docs.aws.amazon.com/lambda/latest/dg/configuration-layers.html).
For Python, this the same as a normal Lambda, except within a `python/`:
`python/cowsay/__init__.py` will be importable via `import cowsay`.

The big win here is easily separating requirements from sources:

```python
# path/to/BUILD
python_sources()

python_awslambda(
  name="lambda",
  entry_point="./foo.py:handler",
  include_requirements=False
)
python_aws_lambda_layer(
  name="layer",
  dependencies=["./foo.py"],
  include_requirements=True,
  include_sources=False,
)
```

Packaging that will result in `path.to/lambda.zip` that contains only
first-party sources, and `path.to/layer.zip` that contains only
third-party requirements. This results in faster builds, less cache
usage, and smaller deploy packages when only changing first-party
sources. This side-steps some of the slowness in #19076. For the example
in that PR:

| metric                        | Lambdex  | zip, no layers | using zip + layers (this PR)   |
|-------------------------------|----------|----------------|--------------------------------|
| init time on cold start       | 2.3-2.5s | 1.3-1.4s       | not yet tested                 |
| compressed size               | 24.6MB   | 23.8MB         | 28KB (lambda), 23.8MB (layer)  |
| uncompressed size             | 117.8MB  | 115.8MB        | 72KB (lambda), 115.7MB (layer) |
| PEX-construction build time   | ~5s      | ~5s            | ~1.5s (lambda), ~5s (layer)    |
| PEX-postprocessing build time | 0.14s    | 4.8s           | 1s (lambda), ~5s (layer)       |

That is, the first-party-only lambda package ~1000×; smaller, and is ~2×
faster to build than even the Lambdex version.

This uses a separate target, `python_aws_lambda_layer`. The target has
its inputs configured using the `dependencies=[...]` field. For
instance, the example above is saying create a lambda using all of the
third-party requirements required (transitively) by `./foo.py`, and none
of the first-party sources.

(The initial implementation 
bdbc1cb
just added a `layout="layer"` option to the existing `python_awslambda`
target, but, per the discussion in this PR, that was deemed
unnecessarily confusing, e.g. it'd keep the `handler` field around,
which is meaningless for a layer.)

Follow-up not handled here:

- for 2.18:
  - documentation
  - renaming the `python_awslambda` target to `python_aws_lambda_function`
     to be clearer (NB. I proposing also taking the chance to add an
     underscore in the name, which I've done with the
     `python_aws_lambda_layer` target. Let me know if there's a strong reason
     for the `awslambda` name without an underscore. I note the GCF target is
    `python_google_cloud_function`)
- not necessarily for 2.18:
  - potentially, adding "sizzle" like the ability to have
     `python_aws_lambda_function` specify which layers it will be used with,
     and thus automatically exclude the contents of the layer from the
     function (e.g. the example above could hypothetically replace
     `include_requirements=False` with `layers=[":layer"]`)
  • Loading branch information
huonw authored May 30, 2023
1 parent 9e95a8e commit 318c853
Show file tree
Hide file tree
Showing 7 changed files with 287 additions and 44 deletions.
70 changes: 66 additions & 4 deletions src/python/pants/backend/awslambda/python/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
PythonAWSLambda,
PythonAwsLambdaHandlerField,
PythonAwsLambdaIncludeRequirements,
PythonAwsLambdaIncludeSources,
PythonAWSLambdaLayer,
PythonAwsLambdaLayerDependenciesField,
PythonAwsLambdaRuntime,
)
from pants.backend.python.subsystems.lambdex import Lambdex, LambdexLayout
Expand All @@ -22,24 +25,38 @@
from pants.core.goals.package import BuiltPackage, OutputPathField, PackageFieldSet
from pants.core.util_rules.environments import EnvironmentField
from pants.engine.rules import Get, collect_rules, rule
from pants.engine.target import InvalidTargetException
from pants.engine.unions import UnionRule
from pants.util.logging import LogLevel
from pants.util.strutil import softwrap

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class PythonAwsLambdaFieldSet(PackageFieldSet):
required_fields = (PythonAwsLambdaHandlerField,)

handler: PythonAwsLambdaHandlerField
class _BaseFieldSet(PackageFieldSet):
include_requirements: PythonAwsLambdaIncludeRequirements
runtime: PythonAwsLambdaRuntime
complete_platforms: PythonFaaSCompletePlatforms
output_path: OutputPathField
environment: EnvironmentField


@dataclass(frozen=True)
class PythonAwsLambdaFieldSet(_BaseFieldSet):
required_fields = (PythonAwsLambdaHandlerField,)

handler: PythonAwsLambdaHandlerField


@dataclass(frozen=True)
class PythonAwsLambdaLayerFieldSet(_BaseFieldSet):
required_fields = (PythonAwsLambdaLayerDependenciesField,)

dependencies: PythonAwsLambdaLayerDependenciesField
include_sources: PythonAwsLambdaIncludeSources


@rule(desc="Create Python AWS Lambda", level=LogLevel.DEBUG)
async def package_python_awslambda(
field_set: PythonAwsLambdaFieldSet,
Expand Down Expand Up @@ -74,11 +91,56 @@ async def package_python_awslambda(
handler=field_set.handler,
output_path=field_set.output_path,
include_requirements=field_set.include_requirements.value,
include_sources=True,
reexported_handler_module=PythonAwsLambdaHandlerField.reexported_handler_module,
),
)


@rule(desc="Create Python AWS Lambda Layer", level=LogLevel.DEBUG)
async def package_python_aws_lambda_layer(
field_set: PythonAwsLambdaLayerFieldSet,
lambdex: Lambdex,
) -> BuiltPackage:
if lambdex.layout is LambdexLayout.LAMBDEX:
raise InvalidTargetException(
softwrap(
f"""
the `{PythonAWSLambdaLayer.alias}` target {field_set.address} cannot be used with
the old Lambdex layout (`[lambdex].layout = \"{LambdexLayout.LAMBDEX.value}\"` in
`pants.toml`), set that to `{LambdexLayout.ZIP.value}` or remove this target
"""
)
)

return await Get(
BuiltPackage,
BuildPythonFaaSRequest(
address=field_set.address,
target_name=PythonAWSLambdaLayer.alias,
complete_platforms=field_set.complete_platforms,
runtime=field_set.runtime,
output_path=field_set.output_path,
include_requirements=field_set.include_requirements.value,
include_sources=field_set.include_sources.value,
# See
# https://docs.aws.amazon.com/lambda/latest/dg/configuration-layers.html#configuration-layers-path
#
# Runtime | Path
# ...
# Python | `python`
# | `python/lib/python3.10/site-packages`
# ...
#
# The one independent on the runtime-version is more convenient:
prefix_in_artifact="python",
# a layer doesn't have a handler, just pulls in things via `dependencies`
handler=None,
reexported_handler_module=None,
),
)


def rules():
return [
*collect_rules(),
Expand Down
97 changes: 94 additions & 3 deletions src/python/pants/backend/awslambda/python/rules_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@

import pytest

from pants.backend.awslambda.python.rules import PythonAwsLambdaFieldSet
from pants.backend.awslambda.python.rules import (
PythonAwsLambdaFieldSet,
PythonAwsLambdaLayerFieldSet,
)
from pants.backend.awslambda.python.rules import rules as awslambda_python_rules
from pants.backend.awslambda.python.target_types import PythonAWSLambda
from pants.backend.awslambda.python.target_types import PythonAWSLambda, PythonAWSLambdaLayer
from pants.backend.awslambda.python.target_types import rules as target_rules
from pants.backend.python.goals import package_pex_binary
from pants.backend.python.goals.package_pex_binary import PexBinaryFieldSet
Expand All @@ -37,6 +40,8 @@
from pants.core.target_types import rules as core_target_types_rules
from pants.engine.addresses import Address
from pants.engine.fs import DigestContents
from pants.engine.internals.scheduler import ExecutionError
from pants.engine.target import FieldSet
from pants.testutil.python_interpreter_selection import all_major_minor_python_versions
from pants.testutil.python_rule_runner import PythonRuleRunner
from pants.testutil.rule_runner import QueryRule
Expand All @@ -54,12 +59,14 @@ def rule_runner() -> PythonRuleRunner:
*target_rules(),
*package.rules(),
QueryRule(BuiltPackage, (PythonAwsLambdaFieldSet,)),
QueryRule(BuiltPackage, (PythonAwsLambdaLayerFieldSet,)),
],
target_types=[
FileTarget,
FilesGeneratorTarget,
PexBinary,
PythonAWSLambda,
PythonAWSLambdaLayer,
PythonRequirementTarget,
PythonRequirementTarget,
PythonSourcesGeneratorTarget,
Expand All @@ -77,13 +84,18 @@ def create_python_awslambda(
*,
expected_extra_log_lines: tuple[str, ...],
extra_args: list[str] | None = None,
layer: bool = False,
) -> tuple[str, bytes]:
rule_runner.set_options(
["--source-root-patterns=src/python", *(extra_args or ())],
env_inherit={"PATH", "PYENV_ROOT", "HOME"},
)
target = rule_runner.get_target(addr)
built_asset = rule_runner.request(BuiltPackage, [PythonAwsLambdaFieldSet.create(target)])
if layer:
field_set: type[FieldSet] = PythonAwsLambdaLayerFieldSet
else:
field_set = PythonAwsLambdaFieldSet
built_asset = rule_runner.request(BuiltPackage, [field_set.create(target)])
assert expected_extra_log_lines == built_asset.artifacts[0].extra_log_lines
digest_contents = rule_runner.request(DigestContents, [built_asset.digest])
assert len(digest_contents) == 1
Expand Down Expand Up @@ -328,3 +340,82 @@ def handler(event, context):
assert (
zipfile.read("lambda_function.py") == b"from foo.bar.hello_world import handler as handler"
)


def test_create_hello_world_layer(rule_runner: PythonRuleRunner) -> None:
rule_runner.write_files(
{
"src/python/foo/bar/hello_world.py": dedent(
"""
import mureq
def handler(event, context):
print('Hello, World!')
"""
),
"src/python/foo/bar/BUILD": dedent(
"""
python_requirement(name="mureq", requirements=["mureq==0.2"])
python_sources()
python_aws_lambda_layer(
name='lambda',
dependencies=["./hello_world.py"],
runtime="python3.7",
)
python_aws_lambda_layer(
name='slimlambda',
include_sources=False,
dependencies=["./hello_world.py"],
runtime="python3.7",
)
"""
),
}
)

zip_file_relpath, content = create_python_awslambda(
rule_runner,
Address("src/python/foo/bar", target_name="lambda"),
expected_extra_log_lines=(),
layer=True,
)
assert "src.python.foo.bar/lambda.zip" == zip_file_relpath

zipfile = ZipFile(BytesIO(content))
names = set(zipfile.namelist())
assert "python/mureq/__init__.py" in names
assert "python/foo/bar/hello_world.py" in names
# nothing that looks like a synthesized handler in any of the names
assert "lambda_function.py" not in " ".join(names)

zip_file_relpath, content = create_python_awslambda(
rule_runner,
Address("src/python/foo/bar", target_name="slimlambda"),
expected_extra_log_lines=(),
layer=True,
)
assert "src.python.foo.bar/slimlambda.zip" == zip_file_relpath

zipfile = ZipFile(BytesIO(content))
names = set(zipfile.namelist())
assert "python/mureq/__init__.py" in names
assert "python/foo/bar/hello_world.py" not in names
# nothing that looks like a synthesized handler in any of the names
assert "lambda_function.py" not in " ".join(names)


def test_layer_must_have_dependencies(rule_runner: PythonRuleRunner) -> None:
"""A layer _must_ use 'dependencies', unlike most other targets."""
rule_runner.write_files(
{"BUILD": "python_aws_lambda_layer(name='lambda', runtime='python3.7')"}
)
with pytest.raises(
ExecutionError, match="The 'dependencies' field in target //:lambda must be defined"
):
create_python_awslambda(
rule_runner,
Address("", target_name="lambda"),
expected_extra_log_lines=(),
layer=True,
)
72 changes: 57 additions & 15 deletions src/python/pants/backend/awslambda/python/target_types.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

import re
from dataclasses import dataclass
from typing import Match, Optional, Tuple, cast
from typing import ClassVar, Match, Optional, Tuple, cast

from pants.backend.python.target_types import PexCompletePlatformsField, PythonResolveField
from pants.backend.python.util_rules.faas import (
Expand All @@ -20,6 +22,7 @@
from pants.engine.target import (
COMMON_TARGET_FIELDS,
BoolField,
Field,
InvalidFieldException,
InvalidTargetException,
Target,
Expand Down Expand Up @@ -63,8 +66,20 @@ class PythonAwsLambdaIncludeRequirements(BoolField):
default = True
help = help_text(
"""
Whether to resolve requirements and include them in the Pex. This is most useful with Lambda
Layers to make code uploads smaller when deps are in layers.
Whether to resolve requirements and include them in the AWS Lambda artifact. This is most useful with Lambda
Layers to make code uploads smaller when third-party requirements are in layers.
https://docs.aws.amazon.com/lambda/latest/dg/configuration-layers.html
"""
)


class PythonAwsLambdaIncludeSources(BoolField):
alias = "include_sources"
default = True
help = help_text(
"""
Whether to resolve first party sources and include them in the AWS Lambda artifact. This is
most useful to allow creating a Lambda Layer with only third-party requirements.
https://docs.aws.amazon.com/lambda/latest/dg/configuration-layers.html
"""
)
Expand Down Expand Up @@ -109,26 +124,20 @@ def to_interpreter_version(self) -> Optional[Tuple[int, int]]:
return int(mo.group("major")), int(mo.group("minor"))


class PythonAWSLambda(Target):
alias = "python_awslambda"
core_fields = (
class PythonAwsLambdaLayerDependenciesField(PythonFaaSDependencies):
required = True


class _AWSLambdaBaseTarget(Target):
core_fields: ClassVar[tuple[type[Field], ...]] = (
*COMMON_TARGET_FIELDS,
OutputPathField,
PythonFaaSDependencies,
PythonAwsLambdaHandlerField,
PythonAwsLambdaIncludeRequirements,
PythonAwsLambdaRuntime,
PythonFaaSCompletePlatforms,
PythonResolveField,
EnvironmentField,
)
help = help_text(
f"""
A self-contained Python function suitable for uploading to AWS Lambda.
See {doc_url('awslambda-python')}.
"""
)

def validate(self) -> None:
if self[PythonAwsLambdaRuntime].value is None and not self[PexCompletePlatformsField].value:
Expand All @@ -143,6 +152,39 @@ def validate(self) -> None:
)


class PythonAWSLambda(_AWSLambdaBaseTarget):
# TODO: rename to python_aws_lambda_function
alias = "python_awslambda"
core_fields = (
*_AWSLambdaBaseTarget.core_fields,
PythonFaaSDependencies,
PythonAwsLambdaHandlerField,
)
help = help_text(
f"""
A self-contained Python function suitable for uploading to AWS Lambda.
See {doc_url('awslambda-python')}.
"""
)


class PythonAWSLambdaLayer(_AWSLambdaBaseTarget):
alias = "python_aws_lambda_layer"
core_fields = (
*_AWSLambdaBaseTarget.core_fields,
PythonAwsLambdaIncludeSources,
PythonAwsLambdaLayerDependenciesField,
)
help = help_text(
f"""
A Python layer suitable for uploading to AWS Lambda.
See {doc_url('awslambda-python')}.
"""
)


def rules():
return (
*collect_rules(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ async def package_python_google_cloud_function(
handler=field_set.handler,
output_path=field_set.output_path,
include_requirements=True,
include_sources=True,
reexported_handler_module=PythonGoogleCloudFunctionHandlerField.reexported_handler_module,
log_only_reexported_handler_func=True,
),
Expand Down
Loading

0 comments on commit 318c853

Please sign in to comment.