ENH: updating reproschema commands to the new pydantic model (#36)

* add print for testing * update clear_header * remove print * fix order and other errors * change ui yesno to radio * fix typo * update context, field->item, fix isVis * remove useless due to failed validation * remove visibility at the item level & remove matrixInfo * fix choice * remove identifier * updating validate command to the new pydantic model * updating/fixing the tests; updating the model to use CreativeWork; changes in formating * fix conversion tests * remove test output * change test output directory * final improvments on tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * model version after adding Thing class * updating model after removing CreativeWork and ImageUrl * adding tests to initialize the model classes * fixing load_file; adding write_obj_jsonld function and expanding test_schema * changing redcap2reproschema to use ned pydantic classes; some small changes to the pydantic model * changing name from string to landstring with en as language * fixing jsonld files * Adding option to return compact schema to load_file * fixing the protocol jsonld file * changing reproschema2redcap to use the new model * adding contectfile to write_obj_jsonld function and improving test; improving compact option for load_file * fixing reproschema2redcap and tests * removing file with the context and fixing references to the context_url (for now the link rfom the ref/linkm branch * updating the reproschema2redcap to work for activity/items from urls * improving error message for file_load and validate; checking the suffix of the file before treating it as jsonld * fixing identify_model_class, so Item and Field are treated the same * fixing reproschema2redcap so it reads responseOptions from another file * rewriting parts of redcap2reproschema, fixing some bugs[wip] * fixing compute: removing isvis condition * fixing process_csv so it doesn't go multiple time through the same condition * changes to input and value mapping (mapping explicitly or raising errors if not found); fixing choices and adding slider; adding sql to compute types (this does not work properly right now); adding many comments * adding output for redcap2reproschema command; removing argparse * model without decimal; revert changes to valueType in the model * adding migrade command * fixing multiple issues with redcap2rp and rp2redcap: adding compute, fixing preamble (can be either activity level or issue level * WIP: addining test to test rp2redcap and redcap2repo using nimh exampl * fixing paths in the tests, should run now * ignore .DS_Store files in validation * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: yibeichan <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
ReproNim · Jun 15, 2024 · 0988c20 · 0988c20
1 parent 89c6337
commit 0988c20
Show file tree

Hide file tree

Showing 52 changed files with 2,280 additions and 1,238 deletions.
diff --git a/reproschema/cli.py b/reproschema/cli.py
@@ -5,7 +5,8 @@
 from . import get_logger, set_logger_level
 from . import __version__
 from .redcap2reproschema import redcap2reproschema as redcap2rs
-from .reproschema2redcap import main as rs2redcap
+from .reproschema2redcap import reproschema2redcap as rs2redcap
+from .migrate import migrate2newschema
 
 lgr = get_logger()
 
@@ -42,14 +43,33 @@ def main(log_level):
 
 
 @main.command()
-@click.option("--shapefile", default=None, type=click.Path(exists=True, dir_okay=False))
 @click.argument("path", nargs=1, type=str)
-def validate(shapefile, path):
+def validate(path):
     if not (path.startswith("http") or os.path.exists(path)):
         raise ValueError(f"{path} must be a URL or an existing file or directory")
     from .validate import validate
 
-    validate(shapefile, path)
+    result = validate(path)
+    if result:
+        click.echo("Validation successful")
+
+
+@main.command()
+@click.argument("path", nargs=1, type=click.Path(exists=True, dir_okay=True))
+@click.option("--inplace", is_flag=True, help="Changing file in place")
+@click.option(
+    "--fixed-path",
+    type=click.Path(dir_okay=True, writable=True, resolve_path=True),
+    help="Path to the fixed file/directory, if not provide suffix 'after_migration' is used",
+)
+def migrate(path, inplace, fixed_path):
+    if not (path.startswith("http") or os.path.exists(path)):
+        raise ValueError(f"{path} must be a URL or an existing file or directory")
+    if fixed_path and inplace:
+        raise Exception("Either inplace or fixed_path has to be provided.")
+    new_path = migrate2newschema(path, inplace=inplace, fixed_path=fixed_path)
+    if new_path:
+        click.echo(f"File/Directory after migration {new_path}")
 
 
 @main.command()
@@ -103,12 +123,19 @@ def serve(port):
 @main.command()
 @click.argument("csv_path", type=click.Path(exists=True, dir_okay=False))
 @click.argument("yaml_path", type=click.Path(exists=True, dir_okay=False))
-def redcap2reproschema(csv_path, yaml_path):
+@click.option(
+    "--output-path",
+    type=click.Path(dir_okay=True, writable=True, resolve_path=True),
+    default=".",
+    show_default=True,
+    help="Path to the output directory, defaults to the current directory.",
+)
+def redcap2reproschema(csv_path, yaml_path, output_path):
     """
     Convert REDCap CSV files to Reproschema format.
     """
     try:
-        redcap2rs(csv_path, yaml_path)
+        redcap2rs(csv_path, yaml_path, output_path)
         click.echo("Converted REDCap data dictionary to Reproschema format.")
     except Exception as e:
         raise click.ClickException(f"Error during conversion: {e}")

diff --git a/reproschema/jsonldutils.py b/reproschema/jsonldutils.py
@@ -1,77 +1,146 @@
 from pyld import jsonld
-from pyshacl import validate as shacl_validate
 import json
 import os
-from .utils import start_server, stop_server, lgr
+from pathlib import Path
+from copy import deepcopy
+import requests
+from urllib.parse import urlparse
+from .utils import start_server, stop_server, lgr, fixing_old_schema, CONTEXTFILE_URL
+from .models import (
+    Item,
+    Activity,
+    Protocol,
+    ResponseOption,
+    ResponseActivity,
+    Response,
+    identify_model_class,
+)
 
 
-def load_file(path_or_url, started=False, http_kwargs={}):
-    try:
+def _is_url(path):
+    """
+    Determine whether the given path is a URL.
+    """
+    parsed = urlparse(str(path))
+    return parsed.scheme in ("http", "https", "ftp", "ftps")
+
+
+def _is_file(path):
+    """
+    Determine whether the given path is a valid file path.
+    """
+    return os.path.isfile(path)
+
+
+def _fetch_jsonld_context(url):
+    response = requests.get(url)
+    return response.json()
+
+
+def load_file(
+    path_or_url,
+    started=False,
+    http_kwargs=None,
+    compact=False,
+    compact_context=None,
+    fixoldschema=False,
+):
+    """Load a file or URL and return the expanded JSON-LD data."""
+    path_or_url = str(path_or_url)
+    if http_kwargs is None:
+        http_kwargs = {}
+    if _is_url(path_or_url):
         data = jsonld.expand(path_or_url)
         if len(data) == 1:
-            if "@id" not in data[0]:
+            if "@id" not in data[0] and "id" not in data[0]:
                 data[0]["@id"] = path_or_url
-    except jsonld.JsonLdError as e:
-        if 'only "http" and "https"' in str(e):
-            lgr.debug("Reloading with local server")
-            root = os.path.dirname(path_or_url)
+    elif _is_file(path_or_url):
+        lgr.debug("Reloading with local server")
+        root = os.path.dirname(path_or_url)
+        if not started:
+            stop, port = start_server(**http_kwargs)
+        else:
+            if "port" not in http_kwargs:
+                raise KeyError("port key missing in http_kwargs")
+            port = http_kwargs["port"]
+        base_url = f"http://localhost:{port}/"
+        if root:
+            base_url += f"{root}/"
+        with open(path_or_url) as json_file:
+            try:
+                data = json.load(json_file)
+            except json.JSONDecodeError as e:
+                raise json.JSONDecodeError(
+                    f"Error parsing JSON file {json_file}: {e.msg}", e.doc, e.pos
+                ) from e
+        try:
+            data = jsonld.expand(data, options={"base": base_url})
+        except:
+            raise
+        finally:
             if not started:
-                stop, port = start_server(**http_kwargs)
+                stop_server(stop)
+        if len(data) == 1:
+            if "@id" not in data[0] and "id" not in data[0]:
+                data[0]["@id"] = base_url + os.path.basename(path_or_url)
+    else:
+        raise Exception(f"{path_or_url} is not a valid URL or file path")
+
+    if isinstance(data, list) and len(data) == 1:
+        data = data[0]
+
+    if fixoldschema:
+        data = fixing_old_schema(data, copy_data=True)
+    if compact:
+        if compact_context:
+            if _is_file(compact_context):
+                with open(compact_context) as fp:
+                    context = json.load(fp)
+            elif _is_url(compact_context):
+                context = _fetch_jsonld_context(compact_context)
             else:
-                if "port" not in http_kwargs:
-                    raise KeyError("port key missing in http_kwargs")
-                port = http_kwargs["port"]
-            base_url = f"http://localhost:{port}/"
-            if root:
-                base_url += f"{root}/"
-            with open(path_or_url) as json_file:
-                data = json.load(json_file)
-            try:
-                data = jsonld.expand(data, options={"base": base_url})
-            except:
-                raise
-            finally:
-                if not started:
-                    stop_server(stop)
-            if len(data) == 1:
-                if "@id" not in data[0]:
-                    data[0]["@id"] = base_url + os.path.basename(path_or_url)
+                raise Exception(
+                    f"compact_context has tobe a file or url, but {compact_context} provided"
+                )
+        if _is_file(path_or_url):
+            data = jsonld.compact(data, ctx=context, options={"base": base_url})
         else:
-            raise
+            data = jsonld.compact(data, ctx=context)
+
     return data
 
 
-def validate_data(data, shape_file_path):
-    """Validate an expanded jsonld document against a shape.
+def validate_data(data):
+    """Validate an expanded jsonld document against the pydantic model.
 
     Parameters
     ----------
     data : dict
         Python dictionary containing JSONLD object
-    shape_file_path : str
-        SHACL file for the document
 
     Returns
     -------
     conforms: bool
         Whether the document is conformant with the shape
     v_text: str
-        Validation information returned by PySHACL
+        Validation errors if any returned by pydantic
 
     """
-    kwargs = {"algorithm": "URDNA2015", "format": "application/n-quads"}
-    normalized = jsonld.normalize(data, kwargs)
-    data_file_format = "nquads"
-    shape_file_format = "turtle"
-    conforms, v_graph, v_text = shacl_validate(
-        normalized,
-        shacl_graph=shape_file_path,
-        data_graph_format=data_file_format,
-        shacl_graph_format=shape_file_format,
-        inference="rdfs",
-        debug=False,
-        serialize_report_graph=True,
-    )
+    # do we need it?
+    # kwargs = {"algorithm": "URDNA2015", "format": "application/n-quads"}
+    # normalized = jsonld.normalize(data, kwargs)
+    obj_type = identify_model_class(data["@type"][0])
+    data_fixed = [fixing_old_schema(data, copy_data=True)]
+    context = _fetch_jsonld_context(CONTEXTFILE_URL)
+    data_fixed_comp = jsonld.compact(data_fixed, context)
+    del data_fixed_comp["@context"]
+    conforms = False
+    v_text = ""
+    try:
+        obj_type(**data_fixed_comp)
+        conforms = True
+    except Exception as e:
+        v_text = str(e)
     return conforms, v_text
 
 

diff --git a/reproschema/migrate.py b/reproschema/migrate.py
@@ -0,0 +1,44 @@
+import json, os
+import shutil
+from pathlib import Path
+
+from .jsonldutils import load_file
+from .utils import fixing_old_schema
+
+
+def migrate2newschema(path, inplace=False, fixed_path=None):
+    path = Path(path).resolve()
+    if path.is_file():
+        print(f"migration of file: {path}")
+        new_path = migrate2newschema_file(path, inplace, fixed_path)
+    else:  # path.is_dir
+        if inplace:
+            new_path = path
+        elif fixed_path:
+            new_path = Path(fixed_path).resolve()
+            shutil.copytree(path, new_path)
+        else:
+            new_path = path.parent / f"{path.name}_after_migration"
+            shutil.copytree(path, new_path)
+        # fixing all files in new_path
+        all_files = Path(new_path).rglob("*")
+        for file in all_files:
+            if file.is_file():
+                migrate2newschema_file(jsonld_path=file, inplace=True)
+    return new_path
+
+
+def migrate2newschema_file(jsonld_path, inplace=False, fixed_path=None):
+    print(f"Fixing {jsonld_path}")
+    data = load_file(jsonld_path, started=False)
+    data_fixed = [fixing_old_schema(data, copy_data=True)]
+    if inplace:
+        new_filename = jsonld_path
+    elif fixedjsonld_path:
+        new_filename = fixed_path
+    else:
+        root, ext = os.path.splitext(jsonld_path)
+        new_filename = f"{root}_after_migration{ext}"
+    with open(new_filename, "w") as f:
+        json.dump(data_fixed, f, indent=4)
+    return new_filename
diff --git a/reproschema/models/__init__.py b/reproschema/models/__init__.py
@@ -1,3 +1,2 @@
-from .protocol import Protocol
-from .activity import Activity
-from .item import Item
+from .model import Activity, Item, Protocol, ResponseOption, ResponseActivity, Response
+from .utils import write_obj_jsonld, identify_model_class
diff --git a/reproschema/models/activity.py b/reproschema/models/activity.py