From b007d34e6c723f7f9d6fcb5a6f58e072d4618cdf Mon Sep 17 00:00:00 2001 From: Ryan Dale Date: Fri, 8 Mar 2024 20:54:36 -0500 Subject: [PATCH] feat: bioconductor improvements (#944) Some fixes identified when preparing for the Bioconductor 3.18 updates: **run_exports.** `bioconductor-skeleton` now adds run_exports to the recipes it builds. Since it's unclear to what extent all bioconductor developers adhere to semantic versioning, I opted for `max_pin="x.x"`, even in the cases where versions start with `0.`. **Better build numbers.** `bioconductor-skeleton update-all-packages` will now only modify the build number if it's needed. It checks what's available in the channel. So running the command multiple times (sometimes needed if the update process flakes out partway through) will not keep bumping the build number. This also uses the same check as the linting, which will hopefully reduce the frequency of build number adjustments in bulk. **Fix manual build failure annotation.** Allow category & reason to show up in manual build failure yaml via `bioconda-utils annotate-build-failures` CLI **More flexible dag.** `bioconda-utils dag` now supports globs. So now with `bioconda-utils dag --packages bioconductor-* | grep -v INFO > dag.gml` and then loading `dag.gml` into [Gephi](https://gephi.org/), you can visually identify which packages should be prioritized for building first. This example sizes nodes by out-degree and labels only those with >125 outgoing edges. ![image](https://github.com/bioconda/bioconda-utils/assets/115406/f2fd2381-59eb-4611-9f67-38d69f731a87) **And minor stuff.** The pinning for bioconductor-data-packages needed a space; there was a build-failure yaml that was empty which crashed osx builds. --- bioconda_utils/bioconductor_skeleton.py | 19 ++++++++++++++++--- bioconda_utils/build_failure.py | 14 +++++++++++--- bioconda_utils/bulk.py | 2 +- bioconda_utils/cli.py | 18 ++++++------------ 4 files changed, 34 insertions(+), 19 deletions(-) diff --git a/bioconda_utils/bioconductor_skeleton.py b/bioconda_utils/bioconductor_skeleton.py index 32dec40994..9a98fedca1 100755 --- a/bioconda_utils/bioconductor_skeleton.py +++ b/bioconda_utils/bioconductor_skeleton.py @@ -774,7 +774,7 @@ def dependencies(self): dependency_mapping[prefix + name.lower() + version] = name # Check SystemRequirements in the DESCRIPTION file to make sure - # packages with such reqquirements are provided correct recipes. + # packages with such requirements are provided correct recipes. if (self.packages[self.package].get('SystemRequirements') is not None): logger.warning( "The 'SystemRequirements' {} are needed".format( @@ -940,7 +940,7 @@ def sub_placeholders(x): additional_run_deps = [] if self.is_data_package: additional_run_deps.append('curl') - additional_run_deps.append('bioconductor-data-packages>={}'.format(date.today().strftime('%Y%m%d'))) + additional_run_deps.append('bioconductor-data-packages >={}'.format(date.today().strftime('%Y%m%d'))) d = OrderedDict(( ( @@ -959,6 +959,7 @@ def sub_placeholders(x): 'build', OrderedDict(( ('number', self.build_number), ('rpaths', ['lib/R/lib/', 'lib/']), + ('run_exports', f'{{{{ pin_subpackage("bioconductor-{self.package_lower}", max_pin="x.x") }}}}'), )), ), ( @@ -1248,7 +1249,19 @@ def write_recipe(package, recipe_dir, config, bioc_data_packages=None, force=Fal (updated_version == current_version) and (updated_meta != current_meta) ): - proj.build_number = int(current_build_number) + 1 + # Sometimes when updating all packages, the updating process fails + # partway. Re-running the updating process should not bump the + # build number if no builds for this version exist yet in the repo. + existing_bldnos = utils.RepoData().get_package_data( + key="build_number", + name="bioconductor-" + proj.package.lower(), + version=updated_version + ) + if not existing_bldnos: + proj.build_number = 0 + else: + proj.build_number = sorted([int(i) for i in existing_bldnos]) [-1] + 1 + if 'extra' in current_meta: exclude = set(['final', 'copy_test_source_files']) proj.extra = {x: y for x, y in current_meta['extra'].items() if x not in exclude} diff --git a/bioconda_utils/build_failure.py b/bioconda_utils/build_failure.py index d2e7da2804..e9f722f001 100644 --- a/bioconda_utils/build_failure.py +++ b/bioconda_utils/build_failure.py @@ -38,6 +38,8 @@ def __init__(self, recipe: Union[str, Recipe], platform: Optional[str]=None): self.platform = platform def load(path): + if os.path.getsize(path) == 0: + raise IOError("Unable to read build failure record {path}: empty file") with open(path, "r") as f: yaml=YAML() try: @@ -89,20 +91,26 @@ def write(self): with open(self.path, "w") as f: yaml=YAML() commented_map = CommentedMap() - commented_map.insert(0, "recipe_sha", self.recipe_sha, comment="The commit at which this recipe failed to build.") + commented_map.insert(0, "recipe_sha", self.recipe_sha, comment="The hash of the recipe's meta.yaml at which this recipe failed to build.") commented_map.insert(1, "skiplist", self.skiplist, comment="Set to true to skiplist this recipe so that it will be ignored as long as its latest commit is the one given above.") i = 2 - if self.log: + + _log = self.inner.get("log", "") + if _log: commented_map.insert( i, "log", # remove invalid chars and keep only the last 100 lines - LiteralScalarString("\n".join(utils.yaml_remove_invalid_chars(self.log).splitlines()[-100:])), + LiteralScalarString("\n".join(utils.yaml_remove_invalid_chars(_log).splitlines()[-100:])), comment="Last 100 lines of the build log." ) i += 1 if self.reason: commented_map.insert(i, "reason", LiteralScalarString(self.reason)) + i += 1 + if self.category: + commented_map.insert(i, "category", LiteralScalarString(self.category)) + i += 1 yaml.dump(commented_map, f) def remove(self): diff --git a/bioconda_utils/bulk.py b/bioconda_utils/bulk.py index b55a1c826c..59aa919cc0 100644 --- a/bioconda_utils/bulk.py +++ b/bioconda_utils/bulk.py @@ -8,7 +8,7 @@ def check_branch(): branch = utils.run(["git", "rev-parse", "--abbrev-ref", "HEAD"], mask=False).stdout if branch != "bulk": - logger.error("bulk-run-ci has to be executed on a checkout of the bulk branch") + logger.error("bulk-trigger-ci has to be executed on a checkout of the bulk branch") exit(1) diff --git a/bioconda_utils/cli.py b/bioconda_utils/cli.py index 8007421258..ee7af2b36e 100644 --- a/bioconda_utils/cli.py +++ b/bioconda_utils/cli.py @@ -568,9 +568,7 @@ def dag(recipe_folder, config, packages="*", format='gml', hide_singletons=False """ Export the DAG of packages to a graph format file for visualization """ - dag, name2recipes = graph.build(utils.get_recipes(recipe_folder, "*"), config) - if packages != "*": - dag = graph.filter(dag, packages) + dag, name2recipes = graph.build(utils.get_recipes(recipe_folder, packages), config) if hide_singletons: for node in nx.nodes(dag): if dag.degree(node) == 0: @@ -1100,15 +1098,11 @@ def list_build_failures(recipe_folder, config, channel=None, output_format=None, fmt_writer(df, sys.stdout, index=False) -@arg( - 'message', - help="The commit message. Will be prepended with [ci skip] to avoid that commits accidentally trigger a rerun while bulk is already running" -) -def bulk_commit(message): - bulk.commit(message) - - def bulk_trigger_ci(): + """ + Create an empty commit with the string "[ci run]" and push, which + triggers a bulk CI run. Must be on the `bulk` branch. + """ bulk.trigger_ci() @@ -1120,5 +1114,5 @@ def main(): build, dag, dependent, do_lint, duplicates, update_pinning, bioconductor_skeleton, clean_cran_skeleton, autobump, handle_merged_pr, annotate_build_failures, list_build_failures, - bulk_commit, bulk_trigger_ci + bulk_trigger_ci ])