Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reduce input variants to only _used_ input variants #1968

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
105 changes: 87 additions & 18 deletions conda_smithy/configure_feedstock.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from conda.models.match_spec import MatchSpec
from conda.models.version import VersionOrder
from conda_build import __version__ as conda_build_version
from conda_build.metadata import get_selectors
from conda_build.metadata import MetaData, get_selectors
from jinja2 import FileSystemLoader
from jinja2.sandbox import SandboxedEnvironment
from rattler_build_conda_compat.loader import parse_recipe_config_file
Expand Down Expand Up @@ -90,6 +90,24 @@
os.environ.get("CONDA_FORGE_PINNING_LIFETIME", 15 * 60)
)

ALWAYS_KEEP_KEYS = {
"zip_keys",
"pin_run_as_build",
"MACOSX_DEPLOYMENT_TARGET",
"MACOSX_SDK_VERSION",
"macos_min_version",
"macos_machine",
"channel_sources",
"channel_targets",
"docker_image",
"build_number_decrement",
# The following keys are required for some of our aarch64 builds
# Added in https://github.com/conda-forge/conda-forge-pinning-feedstock/pull/180
"cdt_arch",
"cdt_name",
"BUILD",
}


# use lru_cache to avoid repeating warnings endlessly;
# this keeps track of 10 different messages and then warns again
Expand Down Expand Up @@ -585,23 +603,7 @@ def _collapse_subpackage_variants(
logger.debug("preserve_top_level_loops %s", preserve_top_level_loops)

# Add in some variables that should always be preserved
always_keep_keys = {
"zip_keys",
"pin_run_as_build",
"MACOSX_DEPLOYMENT_TARGET",
"MACOSX_SDK_VERSION",
"macos_min_version",
"macos_machine",
"channel_sources",
"channel_targets",
"docker_image",
"build_number_decrement",
# The following keys are required for some of our aarch64 builds
# Added in https://github.com/conda-forge/conda-forge-pinning-feedstock/pull/180
"cdt_arch",
"cdt_name",
"BUILD",
}
always_keep_keys = set() | ALWAYS_KEEP_KEYS

if not is_noarch:
always_keep_keys.add("target_platform")
Expand Down Expand Up @@ -855,6 +857,65 @@ def migrate_combined_spec(combined_spec, forge_dir, config, forge_config):
return combined_spec


def reduce_variants(recipe_path, config, input_variants):
"""Subset of render_recipe to compute reduced variant matrix

large numbers of unused variants greatly increase render time
"""

# from render_recipe
with conda_build.render.open_recipe(recipe_path) as recipe:
metadata = MetaData(str(recipe), config=config)

# from distribute_variants
# explode variants dict to list of variants
variants = initial_variants = conda_build.variants.get_package_variants(
metadata, variants=input_variants
)
logger.debug("Starting with %s input variants", len(initial_variants))
metadata.config.variant = variants[0]
metadata.config.variants = variants

# force_global finds variables in the whole recipe, not just a single output.
# Without this, dependencies won't be found for multi-output recipes
# This may not be comprehensive!
all_used = metadata.get_used_vars(force_global=True)
beckermr marked this conversation as resolved.
Show resolved Hide resolved
# conservatively, find all used vars in all scripts
# we can't use `output.script` because these aren't computed yet
# bonus: this will find output.build.script, which conda-build does not.
for script in Path(recipe_path).glob("*.sh"):
all_used.update(
conda_build.variants.find_used_variables_in_shell_script(
metadata.config.variant, script
)
)
for script in Path(recipe_path).glob("*.bat"):
all_used.update(
conda_build.variants.find_used_variables_in_batch_script(
metadata.config.variant, script
)
)

# trim unused dimensions, these cost a lot in render_recipe!
# because `get_used_vars` above _may_ not catch all possible used variables,
# only trim unused variables that increase dimensionality.
all_used.update(ALWAYS_KEEP_KEYS)
all_used.update(
{"target_platform", "extend_keys", "ignore_build_only_deps"}
)
new_input_variants = input_variants.copy()
for key, value in input_variants.items():
# only consider keys that increase render dimensionality for trimming at this stage
# so we don't have to trust all_used to find _everything_
if len(value) > 1 and key not in all_used:
beckermr marked this conversation as resolved.
Show resolved Hide resolved
logger.debug("Trimming unused dimension: %s", key)
new_input_variants.pop(key)
_trim_unused_zip_keys(new_input_variants)
# new_variants = metadata.get_reduced_variant_set(all_used)
# logger.info(f"Rendering with {len(new_variants)} input variants")
return new_input_variants


def _conda_build_api_render_for_smithy(
recipe_path,
config=None,
Expand Down Expand Up @@ -887,6 +948,12 @@ def _conda_build_api_render_for_smithy(

config = get_or_merge_config(config, **kwargs)

# reduce unused variants first, they get very expensive in render_recipe
if variants:
variants = reduce_variants(
recipe_path, config=config, input_variants=variants
)

metadata_tuples = render_recipe(
recipe_path,
bypass_env_check=bypass_env_check,
Expand All @@ -896,6 +963,7 @@ def _conda_build_api_render_for_smithy(
permit_unsatisfiable_variants=permit_unsatisfiable_variants,
)
output_metas = []
# reduce input variant set to those that are actually used
for meta, download, render_in_env in metadata_tuples:
if not meta.skip() or not config.trim_skip:
for od, om in meta.get_output_metadata_set(
Expand Down Expand Up @@ -1064,6 +1132,7 @@ def _render_ci_provider(
# CBC yaml files where variants in the migrators are not in the CBC.
# Thus we move it out of the way.
# TODO: upstream this as a flag in conda-build
logger.info("Getting variants for %s-%s", platform, arch)
try:
_recipe_cbc = os.path.join(
forge_dir,
Expand Down
3 changes: 3 additions & 0 deletions news/1968_input_variants.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
**Fixed:**

* Improved render time for large numbers of variants
2 changes: 2 additions & 0 deletions tests/recipes/multiple_outputs/build-output1.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
echo %script_all%
echo %script_bat%
1 change: 1 addition & 0 deletions tests/recipes/multiple_outputs/build-output1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
echo "${script_all}"
2 changes: 2 additions & 0 deletions tests/recipes/multiple_outputs/build-output2.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
echo %script_all%
echo %script_bat%
2 changes: 2 additions & 0 deletions tests/recipes/multiple_outputs/build-output2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
echo "${script_all}"
echo "${script_only_2}"
16 changes: 16 additions & 0 deletions tests/recipes/multiple_outputs/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,22 @@ zlib:
nonexistent:
- a
- b
# not used; should not be picked up
unused:
- a
- b
# used in all output scripts
# single value
script_all:
- a
# used in output2 sh script
script_only_2:
- a2
- b2
# used in both outputs, only windows
script_bat:
- a2
- b2
# zip these just for testing purposes
zip_keys:
-
Expand Down
4 changes: 4 additions & 0 deletions tests/recipes/multiple_outputs/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,17 @@ requirements:

outputs:
- name: test_output_1
script: build-output1.sh # [not win]
script: build-output1.bat # [win]
requirements:
- bzip2
test:
commands:
- test -e $PREFIX/lib/libz.so # [linux]
- test -e $PREFIX/lib/libz.dylib # [osx]
- name: test_output_2
script: build-output2.sh # [not win]
script: build-output2.bat # [win]
requirements:
host:
- jpeg
Expand Down
37 changes: 34 additions & 3 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import shutil
import subprocess
from pathlib import Path
from textwrap import dedent

import pytest
Expand Down Expand Up @@ -114,19 +115,23 @@ def test_init_multiple_output_matrix(testing_workdir):
temporary_directory=os.path.join(recipe, "temp"),
)
regen_obj(args)
matrix_dir = os.path.join(feedstock_dir, ".ci_support")
# the matrix should be consolidated among all outputs, as well as the top-level
# reqs. Only the top-level reqs should have indedependent config files,
# though - loops within outputs are contained in those top-level configs.
matrix_dir_len = len(os.listdir(matrix_dir))
assert matrix_dir_len == 13
matrix_dir = Path(feedstock_dir) / ".ci_support"
matrix_files = list(matrix_dir.glob("*.yaml"))
linux_libpng16 = matrix_dir / "linux_64_libpng1.6libpq9.5.yaml"
assert linux_libpng16 in matrix_files
assert len(matrix_files) == 16
linux_libpng16 = os.path.join(
matrix_dir, "linux_64_libpng1.6libpq9.5.yaml"
)
assert os.path.isfile(linux_libpng16)
with open(linux_libpng16) as f:
config = yaml.safe_load(f)
assert "libpng" in config
assert config["libpng"] == ["1.6"]
assert "libpq" in config
assert config["libpq"] == ["9.5"]
# this is a zipped key, but it's not used, so it shouldn't show up
assert "libtiff" not in config
Expand All @@ -139,6 +144,32 @@ def test_init_multiple_output_matrix(testing_workdir):
# not show up in the final configs.
assert "zlib" not in config

# script vars are present
assert "script_all" in config
assert config["script_all"] == ["a"]
assert "script_only_2" in config
assert config["script_only_2"] == ["a2", "b2"]

# windows script_bat shouldn't be in config
assert "script_bat" not in config

# check windows, which has additional axis
win_config = matrix_dir / "win_64_libpng1.5libpq9.6script_bata2.yaml"
assert win_config in matrix_files
with win_config.open() as f:
config = yaml.safe_load(f)

# script vars are present
assert "script_all" in config
assert config["script_all"] == ["a"]

# sh-only var not in windows config
assert "script_only_2" not in config

# windows script_bat should be in config
assert "script_bat" in config
assert config["script_bat"] == ["a2"]


@pytest.mark.parametrize(
"dirname", ["multiple_outputs", "multiple_outputs2", "multiple_outputs3"]
Expand Down
Loading