Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Module import #615

Merged
merged 18 commits into from
Jul 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 126 additions & 3 deletions nf_core/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import nf_core.licences
import nf_core.lint
import nf_core.list
import nf_core.modules
import nf_core.schema
import nf_core.sync

Expand Down Expand Up @@ -67,6 +68,20 @@ def decorator(f):

return decorator

def group(self, *args, **kwargs):
"""Behaves the same as `click.Group.group()` except capture
a priority for listing command names in help.
"""
help_priority = kwargs.pop("help_priority", 1000)
help_priorities = self.help_priorities

def decorator(f):
cmd = super(CustomHelpOrder, self).command(*args, **kwargs)(f)
help_priorities[cmd.name] = help_priority
return cmd

return decorator


@click.group(cls=CustomHelpOrder)
@click.version_option(nf_core.__version__)
Expand Down Expand Up @@ -252,8 +267,116 @@ def lint(pipeline_dir, release, markdown, json):
sys.exit(1)


## nf-core module subcommands
@nf_core_cli.group(cls=CustomHelpOrder, help_priority=7)
@click.option(
"-r",
"--repository",
type=str,
default="nf-core/modules",
help="GitHub repository hosting software wrapper modules.",
)
@click.option("-b", "--branch", type=str, default="master", help="Modules GitHub repo git branch to use.")
@click.pass_context
def modules(ctx, repository, branch):
"""
Work with the nf-core/modules software wrappers.

Tools to manage DSL 2 nf-core/modules software wrapper imports.
"""
# ensure that ctx.obj exists and is a dict (in case `cli()` is called
# by means other than the `if` block below)
ctx.ensure_object(dict)

# Make repository object to pass to subcommands
ctx.obj["modules_repo_obj"] = nf_core.modules.ModulesRepo(repository, branch)


@modules.command(help_priority=1)
@click.pass_context
def list(ctx):
"""
List available software modules.

Lists all currently available software wrappers in the nf-core/modules repository.
"""
mods = nf_core.modules.PipelineModules()
mods.modules_repo = ctx.obj["modules_repo_obj"]
print(mods.list_modules())


@modules.command(help_priority=2)
@click.pass_context
@click.argument("pipeline_dir", type=click.Path(exists=True), required=True, metavar="<pipeline directory>")
@click.argument("tool", type=str, required=True, metavar="<tool name>")
def install(ctx, pipeline_dir, tool):
"""
Add a DSL2 software wrapper module to a pipeline.

Given a software name, finds the relevant files in nf-core/modules
and copies to the pipeline along with associated metadata.
"""
mods = nf_core.modules.PipelineModules()
mods.modules_repo = ctx.obj["modules_repo_obj"]
mods.pipeline_dir = pipeline_dir
mods.install(tool)


@modules.command(help_priority=3)
@click.pass_context
@click.argument("pipeline_dir", type=click.Path(exists=True), required=True, metavar="<pipeline directory>")
@click.argument("tool", type=str, metavar="<tool name>")
@click.option("-f", "--force", is_flag=True, default=False, help="Force overwrite of files")
def update(ctx, tool, pipeline_dir, force):
"""
Update one or all software wrapper modules.

Compares a currently installed module against what is available in nf-core/modules.
Fetchs files and updates all relevant files for that software wrapper.

If no module name is specified, loops through all currently installed modules.
If no version is specified, looks for the latest available version on nf-core/modules.
"""
mods = nf_core.modules.PipelineModules()
mods.modules_repo = ctx.obj["modules_repo_obj"]
mods.pipeline_dir = pipeline_dir
mods.update(tool, force=force)


@modules.command(help_priority=4)
@click.pass_context
@click.argument("pipeline_dir", type=click.Path(exists=True), required=True, metavar="<pipeline directory>")
@click.argument("tool", type=str, required=True, metavar="<tool name>")
def remove(ctx, pipeline_dir, tool):
"""
Remove a software wrapper from a pipeline.
"""
mods = nf_core.modules.PipelineModules()
mods.modules_repo = ctx.obj["modules_repo_obj"]
mods.pipeline_dir = pipeline_dir
mods.remove(tool)


@modules.command(help_priority=5)
@click.pass_context
def check(ctx):
"""
Check that imported module code has not been modified.

Compares a software module against the copy on nf-core/modules.
If any local modifications are found, the command logs an error
and exits with a non-zero exit code.

Use by the lint tests and automated CI to check that centralised
software wrapper code is only modified in the central repository.
"""
mods = nf_core.modules.PipelineModules()
mods.modules_repo = ctx.obj["modules_repo_obj"]
mods.check_modules()


## nf-core schema subcommands
@nf_core_cli.group(cls=CustomHelpOrder)
@nf_core_cli.group(cls=CustomHelpOrder, help_priority=8)
def schema():
"""
Suite of tools for developers to manage pipeline schema.
Expand Down Expand Up @@ -340,7 +463,7 @@ def lint(schema_path):
sys.exit(1)


@nf_core_cli.command("bump-version", help_priority=7)
@nf_core_cli.command("bump-version", help_priority=9)
@click.argument("pipeline_dir", type=click.Path(exists=True), required=True, metavar="<pipeline directory>")
@click.argument("new_version", required=True, metavar="<new version>")
@click.option(
Expand Down Expand Up @@ -374,7 +497,7 @@ def bump_version(pipeline_dir, new_version, nextflow):
nf_core.bump_version.bump_nextflow_version(lint_obj, new_version)


@nf_core_cli.command("sync", help_priority=8)
@nf_core_cli.command("sync", help_priority=10)
@click.argument("pipeline_dir", type=click.Path(exists=True), nargs=-1, metavar="<pipeline directory>")
@click.option(
"-t", "--make-template-branch", is_flag=True, default=False, help="Create a TEMPLATE branch if none is found."
Expand Down
199 changes: 199 additions & 0 deletions nf_core/modules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
#!/usr/bin/env python
"""
Code to handle DSL2 module imports from a GitHub repository
"""

from __future__ import print_function

import base64
import logging
import os
import requests
import sys
import tempfile


class ModulesRepo(object):
"""
An object to store details about the repository being used for modules.

Used by the `nf-core modules` top-level command with -r and -b flags,
so that this can be used in the same way by all sucommands.
"""

def __init__(self, repo="nf-core/modules", branch="master"):
self.name = repo
self.branch = branch


class PipelineModules(object):
def __init__(self):
"""
Initialise the PipelineModules object
"""
self.modules_repo = ModulesRepo()
self.pipeline_dir = None
self.modules_file_tree = {}
self.modules_current_hash = None
self.modules_avail_module_names = []

def list_modules(self):
"""
Get available module names from GitHub tree for repo
and print as list to stdout
"""
self.get_modules_file_tree()
return_str = ""

if len(self.modules_avail_module_names) > 0:
logging.info("Modules available from {} ({}):\n".format(self.modules_repo.name, self.modules_repo.branch))
# Print results to stdout
return_str += "\n".join(self.modules_avail_module_names)
else:
logging.info(
"No available modules found in {} ({}):\n".format(self.modules_repo.name, self.modules_repo.branch)
)
return return_str

def install(self, module):

# Check that we were given a pipeline
if self.pipeline_dir is None or not os.path.exists(self.pipeline_dir):
logging.error("Could not find pipeline: {}".format(self.pipeline_dir))
return False
main_nf = os.path.join(self.pipeline_dir, "main.nf")
nf_config = os.path.join(self.pipeline_dir, "nextflow.config")
if not os.path.exists(main_nf) and not os.path.exists(nf_config):
logging.error("Could not find a main.nf or nextfow.config file in: {}".format(self.pipeline_dir))
return False

# Get the available modules
self.get_modules_file_tree()

# Check that the supplied name is an available module
if module not in self.modules_avail_module_names:
logging.error("Module '{}' not found in list of available modules.".format(module))
logging.info("Use the command 'nf-core modules list' to view available software")
return False
logging.debug("Installing module '{}' at modules hash {}".format(module, self.modules_current_hash))

# Check that we don't already have a folder for this module
module_dir = os.path.join(self.pipeline_dir, "modules", "software", module)
if os.path.exists(module_dir):
logging.error("Module directory already exists: {}".format(module_dir))
logging.info("To update an existing module, use the commands 'nf-core update' or 'nf-core fix'")
return False

# Download module files
files = self.get_module_file_urls(module)
logging.debug("Fetching module files:\n - {}".format("\n - ".join(files.keys())))
for filename, api_url in files.items():
dl_filename = os.path.join(self.pipeline_dir, "modules", filename)
self.download_gh_file(dl_filename, api_url)

def update(self, module, force=False):
logging.error("This command is not yet implemented")
pass

def remove(self, module):
logging.error("This command is not yet implemented")
pass

def check_modules(self):
logging.error("This command is not yet implemented")
pass

def get_modules_file_tree(self):
"""
Fetch the file list from the repo, using the GitHub API

Sets self.modules_file_tree
self.modules_current_hash
self.modules_avail_module_names
"""
api_url = "https://api.github.com/repos/{}/git/trees/{}?recursive=1".format(
self.modules_repo.name, self.modules_repo.branch
)
r = requests.get(api_url)
if r.status_code == 404:
logging.error(
"Repository / branch not found: {} ({})\n{}".format(
self.modules_repo.name, self.modules_repo.branch, api_url
)
)
sys.exit(1)
elif r.status_code != 200:
raise SystemError(
"Could not fetch {} ({}) tree: {}\n{}".format(
self.modules_repo.name, self.modules_repo.branch, r.status_code, api_url
)
)

result = r.json()
assert result["truncated"] == False

self.modules_current_hash = result["sha"]
self.modules_file_tree = result["tree"]
for f in result["tree"]:
if f["path"].startswith("software/") and f["path"].endswith("/main.nf") and "/test/" not in f["path"]:
# remove software/ and /main.nf
self.modules_avail_module_names.append(f["path"][9:-8])

def get_module_file_urls(self, module):
"""Fetch list of URLs for a specific module

Takes the name of a module and iterates over the GitHub repo file tree.
Loops over items that are prefixed with the path 'software/<module_name>' and ignores
anything that's not a blob. Also ignores the test/ subfolder.

Returns a dictionary with keys as filenames and values as GitHub API URIs.
These can be used to then download file contents.

Args:
module (string): Name of module for which to fetch a set of URLs

Returns:
dict: Set of files and associated URLs as follows:

{
'software/fastqc/main.nf': 'https://api.github.com/repos/nf-core/modules/git/blobs/65ba598119206a2b851b86a9b5880b5476e263c3',
'software/fastqc/meta.yml': 'https://api.github.com/repos/nf-core/modules/git/blobs/0d5afc23ba44d44a805c35902febc0a382b17651'
}
"""
results = {}
for f in self.modules_file_tree:
if not f["path"].startswith("software/{}".format(module)):
continue
if f["type"] != "blob":
continue
if "/test/" in f["path"]:
continue
results[f["path"]] = f["url"]
return results

def download_gh_file(self, dl_filename, api_url):
"""Download a file from GitHub using the GitHub API

Args:
dl_filename (string): Path to save file to
api_url (string): GitHub API URL for file

Raises:
If a problem, raises an error
"""

# Make target directory if it doesn't already exist
dl_directory = os.path.dirname(dl_filename)
if not os.path.exists(dl_directory):
os.makedirs(dl_directory)

# Call the GitHub API
r = requests.get(api_url)
if r.status_code != 200:
raise SystemError("Could not fetch {} file: {}\n {}".format(self.modules_repo.name, r.status_code, api_url))
result = r.json()
file_contents = base64.b64decode(result["content"])

# Write the file contents
with open(dl_filename, "wb") as fh:
fh.write(file_contents)
Loading