diff --git a/conda_smithy/configure_feedstock.py b/conda_smithy/configure_feedstock.py index a5215d6fd..fd925b63d 100644 --- a/conda_smithy/configure_feedstock.py +++ b/conda_smithy/configure_feedstock.py @@ -37,7 +37,7 @@ from conda.models.match_spec import MatchSpec from conda.models.version import VersionOrder from conda_build import __version__ as conda_build_version -from conda_build.metadata import get_selectors +from conda_build.metadata import MetaData, get_selectors from jinja2 import FileSystemLoader from jinja2.sandbox import SandboxedEnvironment from rattler_build_conda_compat.loader import parse_recipe_config_file @@ -90,6 +90,24 @@ os.environ.get("CONDA_FORGE_PINNING_LIFETIME", 15 * 60) ) +ALWAYS_KEEP_KEYS = { + "zip_keys", + "pin_run_as_build", + "MACOSX_DEPLOYMENT_TARGET", + "MACOSX_SDK_VERSION", + "macos_min_version", + "macos_machine", + "channel_sources", + "channel_targets", + "docker_image", + "build_number_decrement", + # The following keys are required for some of our aarch64 builds + # Added in https://github.com/conda-forge/conda-forge-pinning-feedstock/pull/180 + "cdt_arch", + "cdt_name", + "BUILD", +} + # use lru_cache to avoid repeating warnings endlessly; # this keeps track of 10 different messages and then warns again @@ -585,23 +603,7 @@ def _collapse_subpackage_variants( logger.debug("preserve_top_level_loops %s", preserve_top_level_loops) # Add in some variables that should always be preserved - always_keep_keys = { - "zip_keys", - "pin_run_as_build", - "MACOSX_DEPLOYMENT_TARGET", - "MACOSX_SDK_VERSION", - "macos_min_version", - "macos_machine", - "channel_sources", - "channel_targets", - "docker_image", - "build_number_decrement", - # The following keys are required for some of our aarch64 builds - # Added in https://github.com/conda-forge/conda-forge-pinning-feedstock/pull/180 - "cdt_arch", - "cdt_name", - "BUILD", - } + always_keep_keys = set() | ALWAYS_KEEP_KEYS if not is_noarch: always_keep_keys.add("target_platform") @@ -855,6 +857,65 @@ def migrate_combined_spec(combined_spec, forge_dir, config, forge_config): return combined_spec +def reduce_variants(recipe_path, config, input_variants): + """Subset of render_recipe to compute reduced variant matrix + + large numbers of unused variants greatly increase render time + """ + + # from render_recipe + with conda_build.render.open_recipe(recipe_path) as recipe: + metadata = MetaData(str(recipe), config=config) + + # from distribute_variants + # explode variants dict to list of variants + variants = initial_variants = conda_build.variants.get_package_variants( + metadata, variants=input_variants + ) + logger.debug("Starting with %s input variants", len(initial_variants)) + metadata.config.variant = variants[0] + metadata.config.variants = variants + + # force_global finds variables in the whole recipe, not just a single output. + # Without this, dependencies won't be found for multi-output recipes + # This may not be comprehensive! + all_used = metadata.get_used_vars(force_global=True) + # conservatively, find all used vars in all scripts + # we can't use `output.script` because these aren't computed yet + # bonus: this will find output.build.script, which conda-build does not. + for script in Path(recipe_path).glob("*.sh"): + all_used.update( + conda_build.variants.find_used_variables_in_shell_script( + metadata.config.variant, script + ) + ) + for script in Path(recipe_path).glob("*.bat"): + all_used.update( + conda_build.variants.find_used_variables_in_batch_script( + metadata.config.variant, script + ) + ) + + # trim unused dimensions, these cost a lot in render_recipe! + # because `get_used_vars` above _may_ not catch all possible used variables, + # only trim unused variables that increase dimensionality. + all_used.update(ALWAYS_KEEP_KEYS) + all_used.update( + {"target_platform", "extend_keys", "ignore_build_only_deps"} + ) + new_input_variants = input_variants.copy() + for key, value in input_variants.items(): + # only consider keys that increase render dimensionality for trimming at this stage + # so we don't have to trust all_used to find _everything_ + if len(value) > 1 and key not in all_used: + logger.debug("Trimming unused dimension: %s", key) + new_input_variants.pop(key) + _trim_unused_zip_keys(new_input_variants) + # new_variants = metadata.get_reduced_variant_set(all_used) + # logger.info(f"Rendering with {len(new_variants)} input variants") + return new_input_variants + + def _conda_build_api_render_for_smithy( recipe_path, config=None, @@ -887,6 +948,12 @@ def _conda_build_api_render_for_smithy( config = get_or_merge_config(config, **kwargs) + # reduce unused variants first, they get very expensive in render_recipe + if variants: + variants = reduce_variants( + recipe_path, config=config, input_variants=variants + ) + metadata_tuples = render_recipe( recipe_path, bypass_env_check=bypass_env_check, @@ -896,6 +963,7 @@ def _conda_build_api_render_for_smithy( permit_unsatisfiable_variants=permit_unsatisfiable_variants, ) output_metas = [] + # reduce input variant set to those that are actually used for meta, download, render_in_env in metadata_tuples: if not meta.skip() or not config.trim_skip: for od, om in meta.get_output_metadata_set( @@ -1064,6 +1132,7 @@ def _render_ci_provider( # CBC yaml files where variants in the migrators are not in the CBC. # Thus we move it out of the way. # TODO: upstream this as a flag in conda-build + logger.info("Getting variants for %s-%s", platform, arch) try: _recipe_cbc = os.path.join( forge_dir, diff --git a/news/1968_input_variants.rst b/news/1968_input_variants.rst new file mode 100644 index 000000000..859b399ba --- /dev/null +++ b/news/1968_input_variants.rst @@ -0,0 +1,3 @@ +**Fixed:** + +* Improved render time for large numbers of variants diff --git a/tests/recipes/multiple_outputs/build-output1.bat b/tests/recipes/multiple_outputs/build-output1.bat new file mode 100644 index 000000000..2c05241aa --- /dev/null +++ b/tests/recipes/multiple_outputs/build-output1.bat @@ -0,0 +1,2 @@ +echo %script_all% +echo %script_bat% diff --git a/tests/recipes/multiple_outputs/build-output1.sh b/tests/recipes/multiple_outputs/build-output1.sh new file mode 100644 index 000000000..7bf834200 --- /dev/null +++ b/tests/recipes/multiple_outputs/build-output1.sh @@ -0,0 +1 @@ +echo "${script_all}" diff --git a/tests/recipes/multiple_outputs/build-output2.bat b/tests/recipes/multiple_outputs/build-output2.bat new file mode 100644 index 000000000..2c05241aa --- /dev/null +++ b/tests/recipes/multiple_outputs/build-output2.bat @@ -0,0 +1,2 @@ +echo %script_all% +echo %script_bat% diff --git a/tests/recipes/multiple_outputs/build-output2.sh b/tests/recipes/multiple_outputs/build-output2.sh new file mode 100644 index 000000000..01c6bde97 --- /dev/null +++ b/tests/recipes/multiple_outputs/build-output2.sh @@ -0,0 +1,2 @@ +echo "${script_all}" +echo "${script_only_2}" diff --git a/tests/recipes/multiple_outputs/conda_build_config.yaml b/tests/recipes/multiple_outputs/conda_build_config.yaml index d0750ab2a..348b98773 100644 --- a/tests/recipes/multiple_outputs/conda_build_config.yaml +++ b/tests/recipes/multiple_outputs/conda_build_config.yaml @@ -24,6 +24,22 @@ zlib: nonexistent: - a - b +# not used; should not be picked up +unused: + - a + - b +# used in all output scripts +# single value +script_all: + - a +# used in output2 sh script +script_only_2: + - a2 + - b2 +# used in both outputs, only windows +script_bat: + - a2 + - b2 # zip these just for testing purposes zip_keys: - diff --git a/tests/recipes/multiple_outputs/meta.yaml b/tests/recipes/multiple_outputs/meta.yaml index 81dc942a2..5e7853fb0 100644 --- a/tests/recipes/multiple_outputs/meta.yaml +++ b/tests/recipes/multiple_outputs/meta.yaml @@ -14,6 +14,8 @@ requirements: outputs: - name: test_output_1 + script: build-output1.sh # [not win] + script: build-output1.bat # [win] requirements: - bzip2 test: @@ -21,6 +23,8 @@ outputs: - test -e $PREFIX/lib/libz.so # [linux] - test -e $PREFIX/lib/libz.dylib # [osx] - name: test_output_2 + script: build-output2.sh # [not win] + script: build-output2.bat # [win] requirements: host: - jpeg diff --git a/tests/test_cli.py b/tests/test_cli.py index e51027a4d..212eb56cb 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -3,6 +3,7 @@ import os import shutil import subprocess +from pathlib import Path from textwrap import dedent import pytest @@ -114,19 +115,23 @@ def test_init_multiple_output_matrix(testing_workdir): temporary_directory=os.path.join(recipe, "temp"), ) regen_obj(args) - matrix_dir = os.path.join(feedstock_dir, ".ci_support") # the matrix should be consolidated among all outputs, as well as the top-level # reqs. Only the top-level reqs should have indedependent config files, # though - loops within outputs are contained in those top-level configs. - matrix_dir_len = len(os.listdir(matrix_dir)) - assert matrix_dir_len == 13 + matrix_dir = Path(feedstock_dir) / ".ci_support" + matrix_files = list(matrix_dir.glob("*.yaml")) + linux_libpng16 = matrix_dir / "linux_64_libpng1.6libpq9.5.yaml" + assert linux_libpng16 in matrix_files + assert len(matrix_files) == 16 linux_libpng16 = os.path.join( matrix_dir, "linux_64_libpng1.6libpq9.5.yaml" ) assert os.path.isfile(linux_libpng16) with open(linux_libpng16) as f: config = yaml.safe_load(f) + assert "libpng" in config assert config["libpng"] == ["1.6"] + assert "libpq" in config assert config["libpq"] == ["9.5"] # this is a zipped key, but it's not used, so it shouldn't show up assert "libtiff" not in config @@ -139,6 +144,32 @@ def test_init_multiple_output_matrix(testing_workdir): # not show up in the final configs. assert "zlib" not in config + # script vars are present + assert "script_all" in config + assert config["script_all"] == ["a"] + assert "script_only_2" in config + assert config["script_only_2"] == ["a2", "b2"] + + # windows script_bat shouldn't be in config + assert "script_bat" not in config + + # check windows, which has additional axis + win_config = matrix_dir / "win_64_libpng1.5libpq9.6script_bata2.yaml" + assert win_config in matrix_files + with win_config.open() as f: + config = yaml.safe_load(f) + + # script vars are present + assert "script_all" in config + assert config["script_all"] == ["a"] + + # sh-only var not in windows config + assert "script_only_2" not in config + + # windows script_bat should be in config + assert "script_bat" in config + assert config["script_bat"] == ["a2"] + @pytest.mark.parametrize( "dirname", ["multiple_outputs", "multiple_outputs2", "multiple_outputs3"]