diff --git a/README.md b/README.md index 7ba69ae..33b12cd 100644 --- a/README.md +++ b/README.md @@ -38,10 +38,11 @@ Commands: validate ``` -## redcap2reproschema.py Usage +## redcap2reproschema Usage +The `redcap2reproschema` function is designed to process a given REDCap CSV file and YAML configuration to generate the output in the reproschema format. ### Prerequisites -Before using the conversion script, ensure you have the following: +Before the conversion, ensure you have the following: 1. **GitHub Repository**: - [Create a GitHub repository](https://docs.github.com/en/repositories/creating-and-managing-repositories/creating-a-new-repository) named `reproschema` to store all your reproschema protocols. @@ -67,8 +68,25 @@ user_name: "john_doe" repo_name: "reproschema" protocol_description: "This protocol is for assessing cognitive skills." ``` +### Command-Line Usage -### Using the Script +The `redcap2reproschema`` function has been integrated into a CLI tool, use the following command: +```bash +reproschema redcap2reproschema path/to/your_redcap_data_dic.csv path/to/your_redcap2rs.yaml +``` + +### Python Function Usage + +You can also use the `redcap2reproschema` function from the `reproschema-py` package in your Python code. + +```python +from reproschema import redcap2reproschema + +csv_path = "path-to/your_redcap_data_dic.csv" +yaml_path = "path-to/your_redcap2rs.yaml" + +reproschema2redcap(input_dir_path, output_csv_filename) +``` After configuring the YAML file: diff --git a/reproschema/cli.py b/reproschema/cli.py index 1a4385b..88655e5 100644 --- a/reproschema/cli.py +++ b/reproschema/cli.py @@ -1,5 +1,6 @@ import os import click +import yaml from . import get_logger, set_logger_level from . import __version__ @@ -100,11 +101,10 @@ def serve(port): @main.command() @click.argument('csv_path', type=click.Path(exists=True, dir_okay=False)) @click.argument('yaml_path', type=click.Path(exists=True, dir_okay=False)) -@click.option('--schema-url', default='https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic', show_default=True, help='URL of the schema context') -def redcap2reproschema(csv_path, yaml_path, schema_url): +def redcap2reproschema(csv_path, yaml_path): """ Convert REDCap CSV files to Reproschema format. - + Provide the path to the REDCap CSV file and the YAML configuration file. """ if not os.path.exists(csv_path): @@ -113,7 +113,7 @@ def redcap2reproschema(csv_path, yaml_path, schema_url): raise click.ClickException(f"YAML file not found at {yaml_path}") try: - # Call the redcap2reproschema main function with provided arguments - redcap2rs(csv_path, schema_url, yaml_path) + redcap2rs(csv_path, yaml_path) + click.echo("Converted REDCap data dictionary to Reproschema format.") except Exception as e: - raise click.ClickException(f"Error during conversion: {e}") \ No newline at end of file + raise click.ClickException(f"Error during conversion: {e}") diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 6ae975b..c08c6b1 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -6,19 +6,8 @@ import json import re import yaml -from collections import defaultdict from bs4 import BeautifulSoup - -def parse_arguments(): - parser = argparse.ArgumentParser( - description="Process REDCap data dictionary and reproschema protocol." - ) - parser.add_argument("csv_file", help="Path to the REDCap data dictionary CSV file.") - parser.add_argument("yaml_file", help="Path to the reproschema protocol YAML file.") - return parser.parse_args() - - def normalize_condition(condition_str): re_parentheses = re.compile(r"\(([0-9]*)\)") re_non_gt_lt_equal = re.compile(r"([^>|<])=") @@ -339,13 +328,28 @@ def process_csv( return datas, order, languages -def main( - csv_path, - schema_context_url, - protocol_name, - protocol_display_name, - protocol_description, -): +def redcap2reproschema(csv_path, yaml_path, schema_context_url=None): + """ + Convert a REDCap data dictionary to Reproschema format. + + :param csv_path: Path to the REDCap CSV file. + :param yaml_path: Path to the YAML configuration file. + :param schema_context_url: URL of the schema context. Optional. + """ + if schema_context_url is None: + schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic" + + # Read YAML configuration + try: + with open(yaml_path, "r") as f: + protocol_info = yaml.safe_load(f) + except FileNotFoundError: + raise FileNotFoundError(f"YAML file '{yaml_path}' not found.") + + protocol_name = protocol.get("protocol_name") + protocol_display_name = protocol.get("protocol_display_name") + protocol_description = protocol.get("protocol_description") + # Initialize variables schema_map = { "Variable / Field Name": "@id", # column A @@ -447,41 +451,29 @@ def main( protocol_visibility_obj, ) - -if __name__ == "__main__": - schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic" # we may also want to keep this schema version updated or in the yaml file - - args = parse_arguments() - - # Read the CSV file path - csv_path = args.csv_file +def main(): + import argparse + parser = argparse.ArgumentParser(description="Convert REDCap data dictionary to Reproschema format.") + parser.add_argument("csv_file", help="Path to the REDCap data dictionary CSV file.") + parser.add_argument("yaml_file", help="Path to the Reproschema protocol YAML file.") + args = parser.parse_args() # Read the YAML configuration - yaml_path = args.yaml_file try: - with open(yaml_path, "r") as f: + with open(args.yaml_file, "r") as f: protocol = yaml.safe_load(f) except FileNotFoundError: - print(f"Error: YAML file '{yaml_path}' not found.") - sys.exit(1) + raise FileNotFoundError(f"YAML file '{args.yaml_file}' not found.") - # Extract values from YAML file - protocol_name = protocol.get("protocol_name") - protocol_display_name = protocol.get("protocol_display_name") - protocol_description = protocol.get("protocol_description") repo_url = protocol.get("repo_url") - # git clone the repo + # Git operations subprocess.run(["git", "clone", repo_url]) - # set up branch and checkout subprocess.run(["git", "checkout", "main"]) - # cd to the repo os.chdir(repo_url.split("/")[-1]) - main( - csv_path, - schema_context_url, - protocol_name, - protocol_display_name, - protocol_description, - ) + # Call the main conversion function + redcap2reproschema(args.csv_file, args.yaml_file) + +if __name__ == "__main__": + main() \ No newline at end of file