From e43004a887fc83dafccc5613e4bc204f906a4b05 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 19 Oct 2023 22:33:31 +0200 Subject: [PATCH] Store run-record path specs POSIX and report platform paths There is limited support for reading and acting on old run-records that have paths stored in platform conventions. Detection works when the path matching an existing item on the file system. Paths are always stored in POSIX notation, whenever they are relative. --- datalad_next/patches/run.py | 111 +++++++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 1 deletion(-) diff --git a/datalad_next/patches/run.py b/datalad_next/patches/run.py index b7672d7a..6348c1bc 100644 --- a/datalad_next/patches/run.py +++ b/datalad_next/patches/run.py @@ -1,4 +1,10 @@ -"""Enhance ``run()`` placeholder substitutions to honor configuration defaults +"""Enhance datalad-core's ``run()`` + +Portable path handling logic for run-records +-------------------------------------------- + +Placeholder substitutions to honor configuration defaults +--------------------------------------------------------- Previously, ``run()`` would not recognize configuration defaults for placeholder substitution. This means that any placeholders globally declared in @@ -19,6 +25,12 @@ """ from itertools import filterfalse +from os.path import lexists +from pathlib import ( + PurePath, + PureWindowsPath, + PurePosixPath, +) import sys from datalad.core.local.run import ( @@ -26,7 +38,10 @@ SequenceFormatter, normalize_command, quote_cmdlinearg, + _create_record as _orig_create_record, ) +from datalad.distribution.dataset import Dataset +from datalad.local.rerun import get_run_info as _orig_get_run_info from datalad.interface.common_cfg import definitions as cfg_defs from datalad.support.constraints import EnsureStr from datalad.support.extensions import register_config @@ -34,6 +49,95 @@ from . import apply_patch +# Deals with https://github.com/datalad/datalad/issues/7512 +def _create_record(run_info, sidecar_flag, ds): + # convert any input/output specification to a POSIX path + for k in ('inputs', 'outputs'): + if k not in run_info: + continue + run_info[k] = [_get_posix_relpath_for_runrecord(p) + for p in run_info[k]] + + return _orig_create_record(run_info, sidecar_flag, ds) + + +def _get_posix_relpath_for_runrecord(path): + p = PurePath(path) + if p.is_absolute(): + # there is no point in converting an absolute path + # to a different platform convention. + # return as-is + return path + + return str(PurePosixPath(p)) + + +# Deals with https://github.com/datalad/datalad/issues/7512 +def get_run_info(dset, message): + msg, run_info = _orig_get_run_info(dset, message) + if run_info is None: + # nothing to process, return as-is + return msg, run_info + + for k in ('inputs', 'outputs'): + if k not in run_info: + continue + run_info[k] = [_get_platform_path_from_runrecord(p, dset) + for p in run_info[k]] + return msg, run_info + + +def _get_platform_path_from_runrecord(path: str, ds: Dataset) -> PurePath: + """Helper to standardize run_info path handling + + Previously, run-records would contain platform-paths (e.g., windows paths + when added on windows, POSIX paths elsewhere). This made cross-platform + rerun impossible out-of-the box, but it also means that such dataset are + out there in unknown numbers. + + This helper inspects any input/output path reported by get_run_info() + and tries to ensure that it matches platform conventions. + + Parameters + ---------- + path: str + A str-path from an input/output specification + ds: Dataset + This dataset's base path is used for existence testing for + convention determination. + + Returns + ------- + str + """ + # we only need to act differently, when an incoming path is + # windows. This is not possible to say with 100% confidence, + # because a POSIX path can also contain a backslash. We support + # a few standard cases where we CAN tell + try: + pathobj = None + if '\\' not in path: + # no windows pathsep, no problem + pathobj = PurePosixPath(path) + # let's assume it is windows for a moment + elif lexists(str(ds.pathobj / PureWindowsPath(path))): + # if there is something on the filesystem for this path, + # we can be reasonably sure that this is indeed a windows + # path. This won't catch everything, but better than nothing + pathobj = PureWindowsPath(path) + else: + # if we get here, we have no idea, and no means to verify + # further hypotheses -- go with the POSIX assumption + # and hope for the best + pathobj = PurePosixPath(path) + assert pathobj is not None + except Exception: + return path + + # we report in platform-conventions + return str(PurePath(pathobj)) + + # This function is taken from datalad-core@a96c51c0b2794b2a2b4432ec7bd51f260cb91a37 # datalad/core/local/run.py # The change has been proposed in https://github.com/datalad/datalad/pull/7509 @@ -80,6 +184,11 @@ def not_subst(x): apply_patch( 'datalad.core.local.run', None, 'format_command', format_command) +apply_patch( + 'datalad.core.local.run', None, '_create_record', _create_record) +apply_patch( + 'datalad.local.rerun', None, 'get_run_info', get_run_info) + register_config( 'datalad.run.substitutions.python', 'Substitution for {python} placeholder',