From 6dd1e76dbafc6829e36bca395e1773c4949f7ffd Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 23 Feb 2020 20:48:33 +0100 Subject: [PATCH 1/6] add python requirements and script to reserve terms; closes #1 --- requirements.txt | 1 + scripts/reserve-terms.py | 235 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 236 insertions(+) create mode 100644 requirements.txt create mode 100755 scripts/reserve-terms.py diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b450057 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +requests==2.23.0 diff --git a/scripts/reserve-terms.py b/scripts/reserve-terms.py new file mode 100755 index 0000000..c85d69e --- /dev/null +++ b/scripts/reserve-terms.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 + +import argparse +import base64 +import json +import logging +import os +import re +import requests +import sys +import textwrap + +from urllib.parse import urlencode + +PWD = os.path.dirname(os.path.realpath(__file__)) + +GITHUB_API_ACCESS_TOKEN = os.getenv('GITHUB_API_ACCESS_TOKEN') +if not GITHUB_API_ACCESS_TOKEN: + raise Exception("Could not determine GITHUB_API_ACCESS_TOKEN") + +GITHUB_API_URL = 'https://api.github.com' +GITHUB_API_DEFAULT_HEADERS = {'Authorization': 'token ' + GITHUB_API_ACCESS_TOKEN, + 'Accept': 'application/vnd.github.v3+json'} + +GITHUB_REPO = 'gizmos' +GITHUB_OWNER = 'ontodev' +GITHUB_BRANCH = 'term-ids' +PUBLISHED_FILE = 'published-terms.txt' +RESERVED_FILE = 'reserved-terms.txt' + +# Initialize the logger: +logging.basicConfig(format='%(asctime)-15s %(name)s %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) +logger.setLevel(logging.WARN) + + +def github_get(endpoint, parameters={}): + """ + Make a GET request to the Github API for the given endpoint with the given parameters + """ + endpoint = endpoint.strip('/') + query_string = "?" + urlencode(parameters) if parameters else "" + url = "{}/{}{}".format(GITHUB_API_URL, endpoint, query_string) + + logger.debug("Sending GET request: {}".format(url)) + response = requests.get(url, headers=GITHUB_API_DEFAULT_HEADERS) + if response.status_code != requests.codes.ok: + response.raise_for_status() + return response.json() + + +def github_post(endpoint, data): + """ + Make a POST request to the Github API for the given endpoint with the given data + """ + endpoint = endpoint.strip('/') + url = "{}/{}".format(GITHUB_API_URL, endpoint) + + try: + data = json.dumps(data) + except Exception as e: + logger.error("Unable to convert {} to JSON.") + sys.exit(1) + + logger.debug("Sending POST request: {} with data: {}".format(url, data)) + response = requests.post(url, headers=GITHUB_API_DEFAULT_HEADERS, data=data) + if response.status_code != requests.codes.ok: + response.raise_for_status() + return response.json() + + +def github_put(endpoint, data): + """ + Make a PUT request to the Github API for the given endpoint with the given data + """ + endpoint = endpoint.strip('/') + url = "{}/{}".format(GITHUB_API_URL, endpoint) + + try: + data = json.dumps(data) + except Exception as e: + logger.error("Unable to convert {} to JSON.") + sys.exit(1) + + logger.debug("Sending PUT request: {} with data: {}".format(url, data)) + response = requests.put(url, headers=GITHUB_API_DEFAULT_HEADERS, data=data) + if response.status_code != requests.codes.ok: + response.raise_for_status() + return response.json() + + +def get_term_files_content(): + """ + For the lists of published and reserved commits stored in github, return their contents and + the SHA used by github to identify their respective files. + """ + logger.info("Retrieving currently published and reserved terms ...") + + info_to_return = {} + for filename in [PUBLISHED_FILE, RESERVED_FILE]: + response = github_get("/repos/{owner}/{repo}/contents/{path}" + .format(owner=GITHUB_OWNER, repo=GITHUB_REPO, path=filename), + {'ref': GITHUB_BRANCH}) + if 'sha' not in response: + raise Exception("Could not determine SHA for {}".format(filename)) + if 'content' not in response: + raise Exception("No file content found for {}".format(filename)) + + decodedBytes = base64.b64decode(response['content']) + info_to_return[filename] = {'sha': response['sha'], + 'content': str(decodedBytes, "utf-8").strip('\n')} + return info_to_return + + +def commit_reserved(content, commit_msg, sha): + """ + Given new content to save to the reserved terms list, a commit message, and the SHA that github + uses to identify the file for the reserved terms list, create a commit and then return the URL + for that commit. + """ + logger.info("Committing to {}/{}/{}".format(GITHUB_OWNER, GITHUB_REPO, RESERVED_FILE)) + response = github_put('/repos/{}/{}/contents/{}'.format(GITHUB_OWNER, GITHUB_REPO, RESERVED_FILE), + {'message': commit_msg, + 'content': base64.b64encode(content.encode("utf-8")).decode(), + 'branch': GITHUB_BRANCH, + 'sha': sha}) + + if 'commit' not in response or 'html_url' not in response['commit']: + logger.error("Unable to extract 'html_url' from successful response.") + logger.debug("Response was: {}".format(response)) + return None + return response['commit']['html_url'] + + +def get_next_ontology_id(terms_to_add, current_terms): + """ + Given a list of current terms, return the next unique ontology id to use for subsequent term + additions, while verifying that none of the given terms to be added already exist. + """ + used_ids = [] + used_terms = {} + for filename in [PUBLISHED_FILE, RESERVED_FILE]: + used_terms[filename] = [] + for line in current_terms[filename]['content'].splitlines(): + line = line.strip() + matched = re.match(r"^OBI:(\d+)\s+(.+)", line) + if not matched: + logger.warning("Ignoring line: '{}' in {} that could not be parsed." + .format(line, filename)) + else: + used_ids.append(int(matched[1])) + used_term = matched[2] + if used_term in terms_to_add: + logger.error("Proposed new term: '{}' already exists in {}. Exiting." + .format(used_term, filename)) + sys.exit(1) + used_terms[filename].append(used_term) + + return (sorted(used_ids).pop() + 1) if used_ids else 1 + + +def prepare_new_reserved_term_content(current_reserved_content, terms_to_add, next_id): + """ + Append the given terms to add to the content of the current reserved list of terms, using ids + beginning at the given next_id, and return the new list. + """ + new_reserved_term_content = current_reserved_content + for i in range(0, len(terms_to_add)): + next_line = "OBI:{} {}".format(str(next_id + i).zfill(7), terms_to_add[i]) + print("Adding {}".format(next_line)) + if new_reserved_term_content != "": + new_reserved_term_content += "\n" + new_reserved_term_content += next_line + + return new_reserved_term_content + + +def main(): + description = textwrap.dedent(''' + Read a number of terms either from the command line or from a file (containing one term per line) + and add them to the list of reserved terms in {}/{}/{} on the branch {}, checking first to verify + that none of the terms are either published (in {}) or already reserved. If no commit message is + specified, the user will be prompted to supply one.''' + .format(GITHUB_OWNER, GITHUB_REPO, RESERVED_FILE, GITHUB_BRANCH, PUBLISHED_FILE)) + parser = argparse.ArgumentParser(description=description) + + parser.add_argument('--commit_message', metavar='MESSAGE', + help=('The message describing the commit in Github. It should include a ' + 'comment with a GitHub issue or PR number (e.g. #1234).')) + + term_args = parser.add_mutually_exclusive_group(required=True) + term_args.add_argument('--terms', metavar='TERM', nargs='+', + help=('A list of terms to add, separated by spaces. If a term contains ' + 'spaces it should be surounded by single or double quotes')) + + term_args.add_argument('--input', type=argparse.FileType('r'), + help='A file containing a list of terms to add, one per line') + args = vars(parser.parse_args()) + + if args.get('input'): + # Ignore any empty lines. + terms_to_add = [l.strip() for l in args.get('input').readlines() if l.strip() != ""] + else: + terms_to_add = args.get('terms') + + # This might happen if the terms are given through an input file and it is empty: + if not terms_to_add: + logger.error("No terms specified.") + sys.exit(1) + + # Prompt the user if no commit message was supplied: + commit_msg = args.get('commit_message') + if not commit_msg: + try: + commit_msg = input("Please enter a commit message: ").strip() + if not commit_msg: + print("A commit message is required.") + sys.exit(1) + except KeyboardInterrupt: + sys.exit(1) + + # Retrieve the currently published and reserved terms: + current_terms = get_term_files_content() + # Determine the next id to use based on the current list: + next_id = get_next_ontology_id(terms_to_add, current_terms) + # Prepare the contents of the file listing reserved commits (including the new ones): + new_reserved_term_content = prepare_new_reserved_term_content( + current_terms[RESERVED_FILE]['content'], terms_to_add, next_id) + # Commit the file and inform the user where (s)he can view the commit contents: + url = commit_reserved(new_reserved_term_content, commit_msg, current_terms[RESERVED_FILE]['sha']) + print("Commit successful. You can review it on github at: {}".format(url)) + + +if __name__ == '__main__': + main() From a44d882a96220424a1ff7d1fa06c9c58d1cef3ab Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 23 Feb 2020 20:55:04 +0100 Subject: [PATCH 2/6] make small formatting etc. changes suggested by pycodestyle and pyflakes; still closes #1 --- scripts/reserve-terms.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/scripts/reserve-terms.py b/scripts/reserve-terms.py index c85d69e..134604c 100755 --- a/scripts/reserve-terms.py +++ b/scripts/reserve-terms.py @@ -12,7 +12,6 @@ from urllib.parse import urlencode -PWD = os.path.dirname(os.path.realpath(__file__)) GITHUB_API_ACCESS_TOKEN = os.getenv('GITHUB_API_ACCESS_TOKEN') if not GITHUB_API_ACCESS_TOKEN: @@ -58,7 +57,7 @@ def github_post(endpoint, data): try: data = json.dumps(data) - except Exception as e: + except Exception: logger.error("Unable to convert {} to JSON.") sys.exit(1) @@ -78,7 +77,7 @@ def github_put(endpoint, data): try: data = json.dumps(data) - except Exception as e: + except Exception: logger.error("Unable to convert {} to JSON.") sys.exit(1) @@ -180,8 +179,8 @@ def main(): Read a number of terms either from the command line or from a file (containing one term per line) and add them to the list of reserved terms in {}/{}/{} on the branch {}, checking first to verify that none of the terms are either published (in {}) or already reserved. If no commit message is - specified, the user will be prompted to supply one.''' - .format(GITHUB_OWNER, GITHUB_REPO, RESERVED_FILE, GITHUB_BRANCH, PUBLISHED_FILE)) + specified, the user will be prompted to supply one.'''.format( + GITHUB_OWNER, GITHUB_REPO, RESERVED_FILE, GITHUB_BRANCH, PUBLISHED_FILE)) parser = argparse.ArgumentParser(description=description) parser.add_argument('--commit_message', metavar='MESSAGE', @@ -190,11 +189,11 @@ def main(): term_args = parser.add_mutually_exclusive_group(required=True) term_args.add_argument('--terms', metavar='TERM', nargs='+', - help=('A list of terms to add, separated by spaces. If a term contains ' - 'spaces it should be surounded by single or double quotes')) + help=('A list of terms to add, separated by spaces. If a term contains ' + 'spaces it should be surounded by single or double quotes')) term_args.add_argument('--input', type=argparse.FileType('r'), - help='A file containing a list of terms to add, one per line') + help='A file containing a list of terms to add, one per line') args = vars(parser.parse_args()) if args.get('input'): From a3374699b1de10d902f5edd9011940edb0cd2cbf Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 23 Feb 2020 21:05:19 +0100 Subject: [PATCH 3/6] add small further safeguard against empty commit message --- scripts/reserve-terms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/reserve-terms.py b/scripts/reserve-terms.py index 134604c..b48fc8c 100755 --- a/scripts/reserve-terms.py +++ b/scripts/reserve-terms.py @@ -209,7 +209,7 @@ def main(): # Prompt the user if no commit message was supplied: commit_msg = args.get('commit_message') - if not commit_msg: + if not commit_msg or not commit_msg.strip(): try: commit_msg = input("Please enter a commit message: ").strip() if not commit_msg: From 957ed9e9da4f75e5148d52ab9194811692a636fc Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Wed, 26 Feb 2020 12:58:47 +0100 Subject: [PATCH 4/6] change --commit_message to --message, change --terms to --labels, add short options for: -m -l -i --- scripts/reserve-terms.py | 68 +++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/scripts/reserve-terms.py b/scripts/reserve-terms.py index b48fc8c..afb4437 100755 --- a/scripts/reserve-terms.py +++ b/scripts/reserve-terms.py @@ -88,9 +88,9 @@ def github_put(endpoint, data): return response.json() -def get_term_files_content(): +def get_terms_files_contents(): """ - For the lists of published and reserved commits stored in github, return their contents and + For the lists of published and reserved terms stored in github, return their contents and the SHA used by github to identify their respective files. """ logger.info("Retrieving currently published and reserved terms ...") @@ -131,15 +131,14 @@ def commit_reserved(content, commit_msg, sha): return response['commit']['html_url'] -def get_next_ontology_id(terms_to_add, current_terms): +def get_next_ontology_id(labels_to_add, current_terms): """ Given a list of current terms, return the next unique ontology id to use for subsequent term - additions, while verifying that none of the given terms to be added already exist. + additions, while verifying that none of the current terms have a label in the list of labels to + be added. """ used_ids = [] - used_terms = {} for filename in [PUBLISHED_FILE, RESERVED_FILE]: - used_terms[filename] = [] for line in current_terms[filename]['content'].splitlines(): line = line.strip() matched = re.match(r"^OBI:(\d+)\s+(.+)", line) @@ -148,24 +147,23 @@ def get_next_ontology_id(terms_to_add, current_terms): .format(line, filename)) else: used_ids.append(int(matched[1])) - used_term = matched[2] - if used_term in terms_to_add: - logger.error("Proposed new term: '{}' already exists in {}. Exiting." - .format(used_term, filename)) + used_label = matched[2] + if used_label in labels_to_add: + logger.error("Proposed new label: '{}' already exists in {}. Exiting." + .format(used_label, filename)) sys.exit(1) - used_terms[filename].append(used_term) return (sorted(used_ids).pop() + 1) if used_ids else 1 -def prepare_new_reserved_term_content(current_reserved_content, terms_to_add, next_id): +def prepare_new_reserved_term_content(current_reserved_content, labels_to_add, next_id): """ - Append the given terms to add to the content of the current reserved list of terms, using ids + Append terms for the given labels to the content of the current reserved list of terms, using ids beginning at the given next_id, and return the new list. """ new_reserved_term_content = current_reserved_content - for i in range(0, len(terms_to_add)): - next_line = "OBI:{} {}".format(str(next_id + i).zfill(7), terms_to_add[i]) + for i in range(0, len(labels_to_add)): + next_line = "OBI:{} {}".format(str(next_id + i).zfill(7), labels_to_add[i]) print("Adding {}".format(next_line)) if new_reserved_term_content != "": new_reserved_term_content += "\n" @@ -176,39 +174,39 @@ def prepare_new_reserved_term_content(current_reserved_content, terms_to_add, ne def main(): description = textwrap.dedent(''' - Read a number of terms either from the command line or from a file (containing one term per line) - and add them to the list of reserved terms in {}/{}/{} on the branch {}, checking first to verify - that none of the terms are either published (in {}) or already reserved. If no commit message is - specified, the user will be prompted to supply one.'''.format( + Read a number of labels either from the command line or from a file (containing one label per + line) and add corresponding terms to the list of reserved terms in {}/{}/{} on the branch {}, + checking first to verify that there are no terms either already published or reserved with those + labels. If no commit message is specified, the user will be prompted to supply one.'''.format( GITHUB_OWNER, GITHUB_REPO, RESERVED_FILE, GITHUB_BRANCH, PUBLISHED_FILE)) parser = argparse.ArgumentParser(description=description) - parser.add_argument('--commit_message', metavar='MESSAGE', + parser.add_argument('-m', '--message', metavar='MESSAGE', help=('The message describing the commit in Github. It should include a ' 'comment with a GitHub issue or PR number (e.g. #1234).')) - term_args = parser.add_mutually_exclusive_group(required=True) - term_args.add_argument('--terms', metavar='TERM', nargs='+', - help=('A list of terms to add, separated by spaces. If a term contains ' - 'spaces it should be surounded by single or double quotes')) + label_args = parser.add_mutually_exclusive_group(required=True) + label_args.add_argument('-l', '--labels', metavar='LABEL', nargs='+', + help=('A list of labels to add, separated by spaces. If a label contains ' + 'spaces it should be surounded by single or double quotes')) - term_args.add_argument('--input', type=argparse.FileType('r'), - help='A file containing a list of terms to add, one per line') + label_args.add_argument('-i', '--input', type=argparse.FileType('r'), + help='A file containing a list of labels to add, one per line') args = vars(parser.parse_args()) if args.get('input'): # Ignore any empty lines. - terms_to_add = [l.strip() for l in args.get('input').readlines() if l.strip() != ""] + labels_to_add = [l.strip() for l in args.get('input').readlines() if l.strip() != ""] else: - terms_to_add = args.get('terms') + labels_to_add = args.get('labels') - # This might happen if the terms are given through an input file and it is empty: - if not terms_to_add: - logger.error("No terms specified.") + # This might happen if the labels are given through an input file and it is empty: + if not labels_to_add: + logger.error("No labels specified.") sys.exit(1) # Prompt the user if no commit message was supplied: - commit_msg = args.get('commit_message') + commit_msg = args.get('message') if not commit_msg or not commit_msg.strip(): try: commit_msg = input("Please enter a commit message: ").strip() @@ -219,12 +217,12 @@ def main(): sys.exit(1) # Retrieve the currently published and reserved terms: - current_terms = get_term_files_content() + current_terms = get_terms_files_contents() # Determine the next id to use based on the current list: - next_id = get_next_ontology_id(terms_to_add, current_terms) + next_id = get_next_ontology_id(labels_to_add, current_terms) # Prepare the contents of the file listing reserved commits (including the new ones): new_reserved_term_content = prepare_new_reserved_term_content( - current_terms[RESERVED_FILE]['content'], terms_to_add, next_id) + current_terms[RESERVED_FILE]['content'], labels_to_add, next_id) # Commit the file and inform the user where (s)he can view the commit contents: url = commit_reserved(new_reserved_term_content, commit_msg, current_terms[RESERVED_FILE]['sha']) print("Commit successful. You can review it on github at: {}".format(url)) From 55aa767bada00d997643b632c7d42b46b80b7906 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Wed, 26 Feb 2020 14:54:00 +0100 Subject: [PATCH 5/6] read labels from stdin if neither -i or -l has been supplied --- scripts/reserve-terms.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/scripts/reserve-terms.py b/scripts/reserve-terms.py index afb4437..9ea7b70 100755 --- a/scripts/reserve-terms.py +++ b/scripts/reserve-terms.py @@ -185,7 +185,7 @@ def main(): help=('The message describing the commit in Github. It should include a ' 'comment with a GitHub issue or PR number (e.g. #1234).')) - label_args = parser.add_mutually_exclusive_group(required=True) + label_args = parser.add_mutually_exclusive_group(required=False) label_args.add_argument('-l', '--labels', metavar='LABEL', nargs='+', help=('A list of labels to add, separated by spaces. If a label contains ' 'spaces it should be surounded by single or double quotes')) @@ -194,11 +194,17 @@ def main(): help='A file containing a list of labels to add, one per line') args = vars(parser.parse_args()) - if args.get('input'): - # Ignore any empty lines. - labels_to_add = [l.strip() for l in args.get('input').readlines() if l.strip() != ""] - else: - labels_to_add = args.get('labels') + # When --input is not specified, we will be reading labels from stdin, and this will interfere + # with reading the commit message. So we force the user to supply a message if he hasn't supplied + # an input: + if not args.get('input') and not args.get('message'): + print("The --message option must be specified when the --input option is omitted.") + sys.exit(1) + + labels_to_add = args.get('labels') + if not labels_to_add: + input_stream = args.get('input') or sys.stdin + labels_to_add = [l.strip() for l in input_stream.readlines() if l.strip() != ""] # This might happen if the labels are given through an input file and it is empty: if not labels_to_add: From 12ae7636c9eb61bada80147319163ad11b59fe7f Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Wed, 26 Feb 2020 17:47:36 +0100 Subject: [PATCH 6/6] move github owner, repository, branch, pusblished and reserved file names, idspace and id digits to config file --- requirements.txt | 1 + scripts/gizmos.yml | 10 +++++ scripts/reserve-terms.py | 94 +++++++++++++++++++++++++++++----------- 3 files changed, 80 insertions(+), 25 deletions(-) create mode 100644 scripts/gizmos.yml diff --git a/requirements.txt b/requirements.txt index b450057..38c5f8e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ requests==2.23.0 +PyYAML==5.3 diff --git a/scripts/gizmos.yml b/scripts/gizmos.yml new file mode 100644 index 0000000..e21a00b --- /dev/null +++ b/scripts/gizmos.yml @@ -0,0 +1,10 @@ +idspace: OBI +id_digits: 7 +github_repo: gizmos +github_owner: ontodev +# Branch in the repository that has the files containing the lists of published and reserved terms: +repo_branch: term-ids +# File in the repository on `repo_branch` containing the list of pubished terms: +published_file: published-terms.txt +# File in the repository on `repo_branch` containing the list of reserved terms: +reserved_file: reserved-terms.txt diff --git a/scripts/reserve-terms.py b/scripts/reserve-terms.py index 9ea7b70..6942a5d 100755 --- a/scripts/reserve-terms.py +++ b/scripts/reserve-terms.py @@ -9,23 +9,26 @@ import requests import sys import textwrap +import yaml from urllib.parse import urlencode +# Github API-related parameters: GITHUB_API_ACCESS_TOKEN = os.getenv('GITHUB_API_ACCESS_TOKEN') if not GITHUB_API_ACCESS_TOKEN: - raise Exception("Could not determine GITHUB_API_ACCESS_TOKEN") + print("Please set environment variable GITHUB_API_ACCESS_TOKEN before running this script.") + sys.exit(1) GITHUB_API_URL = 'https://api.github.com' GITHUB_API_DEFAULT_HEADERS = {'Authorization': 'token ' + GITHUB_API_ACCESS_TOKEN, 'Accept': 'application/vnd.github.v3+json'} -GITHUB_REPO = 'gizmos' -GITHUB_OWNER = 'ontodev' -GITHUB_BRANCH = 'term-ids' -PUBLISHED_FILE = 'published-terms.txt' -RESERVED_FILE = 'reserved-terms.txt' +# The directory where this script is located: +pwd = os.path.dirname(os.path.realpath(__file__)) + +# Initialize a global configuration map, which will be loaded in main(): +config = {} # Initialize the logger: logging.basicConfig(format='%(asctime)-15s %(name)s %(levelname)s - %(message)s') @@ -96,10 +99,12 @@ def get_terms_files_contents(): logger.info("Retrieving currently published and reserved terms ...") info_to_return = {} - for filename in [PUBLISHED_FILE, RESERVED_FILE]: - response = github_get("/repos/{owner}/{repo}/contents/{path}" - .format(owner=GITHUB_OWNER, repo=GITHUB_REPO, path=filename), - {'ref': GITHUB_BRANCH}) + for filename in [config['published_file'], config['reserved_file']]: + response = github_get( + "/repos/{owner}/{repo}/contents/{path}" + .format(owner=config['github_owner'], repo=config['github_repo'], path=filename), + {'ref': config['repo_branch']}) + if 'sha' not in response: raise Exception("Could not determine SHA for {}".format(filename)) if 'content' not in response: @@ -117,11 +122,15 @@ def commit_reserved(content, commit_msg, sha): uses to identify the file for the reserved terms list, create a commit and then return the URL for that commit. """ - logger.info("Committing to {}/{}/{}".format(GITHUB_OWNER, GITHUB_REPO, RESERVED_FILE)) - response = github_put('/repos/{}/{}/contents/{}'.format(GITHUB_OWNER, GITHUB_REPO, RESERVED_FILE), + logger.info("Committing to {}/{}/{}".format( + config['github_owner'], config['github_repo'], config['reserved_file'])) + + response = github_put('/repos/{}/{}/contents/{}'.format(config['github_owner'], + config['github_repo'], + config['reserved_file']), {'message': commit_msg, 'content': base64.b64encode(content.encode("utf-8")).decode(), - 'branch': GITHUB_BRANCH, + 'branch': config['repo_branch'], 'sha': sha}) if 'commit' not in response or 'html_url' not in response['commit']: @@ -138,10 +147,10 @@ def get_next_ontology_id(labels_to_add, current_terms): be added. """ used_ids = [] - for filename in [PUBLISHED_FILE, RESERVED_FILE]: + for filename in [config['published_file'], config['reserved_file']]: for line in current_terms[filename]['content'].splitlines(): line = line.strip() - matched = re.match(r"^OBI:(\d+)\s+(.+)", line) + matched = re.match(r"^{idspace}:(\d+)\s+(.+)".format(idspace=config['idspace']), line) if not matched: logger.warning("Ignoring line: '{}' in {} that could not be parsed." .format(line, filename)) @@ -163,7 +172,9 @@ def prepare_new_reserved_term_content(current_reserved_content, labels_to_add, n """ new_reserved_term_content = current_reserved_content for i in range(0, len(labels_to_add)): - next_line = "OBI:{} {}".format(str(next_id + i).zfill(7), labels_to_add[i]) + next_line = "{}:{} {}".format(config['idspace'], + str(next_id + i).zfill(config['id_digits']), + labels_to_add[i]) print("Adding {}".format(next_line)) if new_reserved_term_content != "": new_reserved_term_content += "\n" @@ -173,13 +184,26 @@ def prepare_new_reserved_term_content(current_reserved_content, labels_to_add, n def main(): - description = textwrap.dedent(''' - Read a number of labels either from the command line or from a file (containing one label per - line) and add corresponding terms to the list of reserved terms in {}/{}/{} on the branch {}, - checking first to verify that there are no terms either already published or reserved with those - labels. If no commit message is specified, the user will be prompted to supply one.'''.format( - GITHUB_OWNER, GITHUB_REPO, RESERVED_FILE, GITHUB_BRANCH, PUBLISHED_FILE)) - parser = argparse.ArgumentParser(description=description) + parser = argparse.ArgumentParser( + formatter_class=argparse.RawTextHelpFormatter, + description=textwrap.dedent(''' + Reads a number of labels either from the command line or from a local file (containing one label + per line) and adds corresponding terms to a remote file (containing the list of currently + reserved terms) located on a specific branch of a particular repository. If any of the supplied + labels are already published or reserved, the script exits with an error without modifying the + reserved list. + + The name of the remote file, branch, and repository are read from a configuration file which can + be specified on the command line, or which otherwise defaults to 'gizmos.yml' in the same + directory as this script. + + The update of the reserved list in the repository will be accompanied by a commit message. If + no commit message has been given on the command line, the user will be prompted to supply one. + ''')) + + parser.add_argument( + '-c', '--config', metavar='FILE', + help="Read configuration from FILE instead of from '{}/gizmos.yml'".format(pwd)) parser.add_argument('-m', '--message', metavar='MESSAGE', help=('The message describing the commit in Github. It should include a ' @@ -194,6 +218,25 @@ def main(): help='A file containing a list of labels to add, one per line') args = vars(parser.parse_args()) + # Load the configuration either from the user-specified file or from the default location: + global config + config_filename = args.get('config') or (pwd + "/gizmos.yml") + with open(config_filename) as yaml_file: + config = yaml.load(yaml_file, Loader=yaml.SafeLoader) + + # Verify that the configuration contains all the required parameters: + required_params = ['idspace', 'github_repo', 'github_owner'] + if any([config.get(param) is None for param in required_params]): + print("Invalid configuration. One or more of the following was not specified: {}" + .format(', '.join(required_params))) + sys.exit(1) + + # Read these from the config file or use the defaults specified below: + config['id_digits'] = config.get('id_digits') or 7 + config['repo_branch'] = config.get('repo_branch') or 'term-ids' + config['published_file'] = config.get('published_file') or 'published-terms.txt' + config['reserved_file'] = config.get('reserved_file') or 'reserved-terms.txt' + # When --input is not specified, we will be reading labels from stdin, and this will interfere # with reading the commit message. So we force the user to supply a message if he hasn't supplied # an input: @@ -228,9 +271,10 @@ def main(): next_id = get_next_ontology_id(labels_to_add, current_terms) # Prepare the contents of the file listing reserved commits (including the new ones): new_reserved_term_content = prepare_new_reserved_term_content( - current_terms[RESERVED_FILE]['content'], labels_to_add, next_id) + current_terms[config['reserved_file']]['content'], labels_to_add, next_id) # Commit the file and inform the user where (s)he can view the commit contents: - url = commit_reserved(new_reserved_term_content, commit_msg, current_terms[RESERVED_FILE]['sha']) + url = commit_reserved(new_reserved_term_content, commit_msg, + current_terms[config['reserved_file']]['sha']) print("Commit successful. You can review it on github at: {}".format(url))