Start of a next-status command

This implementation is the first to emit `CommandResult` type result items, ie. dataclass instances rather than result dicts. It also uses uniform parameter validation, enabling substantially simplified implementation (e.g., of the result renderer). The user-facing appearance remains (largely?) the same. TODO more detailed analysis The command options `untracked` and `recursive` now both take (optional) qualifiying values, but also work with any value specification at the CLI. Closes datalad#586 (eventually)
mih · Jan 15, 2024 · dc740d0 · dc740d0
1 parent 7194e28
commit dc740d0
Show file tree

Hide file tree

Showing 7 changed files with 745 additions and 37 deletions.
diff --git a/datalad_next/__init__.py b/datalad_next/__init__.py
@@ -43,6 +43,10 @@
             'datalad_next.commands.ls_file_collection', 'LsFileCollection',
             'ls-file-collection',
         ),
+        (
+            'datalad_next.commands.status', 'Status',
+            'next-status', 'next_status',
+        ),
     ]
 )
 

diff --git a/datalad_next/commands/__init__.py b/datalad_next/commands/__init__.py
@@ -14,6 +14,7 @@
 
    CommandResult
    CommandResultStatus
+   status.StatusResult
 """
 from __future__ import annotations
 

diff --git a/datalad_next/commands/status.py b/datalad_next/commands/status.py
@@ -0,0 +1,298 @@
+"""
+"""
+from __future__ import annotations
+
+__docformat__ = 'restructuredtext'
+
+from dataclasses import dataclass
+from enum import Enum
+from logging import getLogger
+from pathlib import Path
+from typing import Generator
+
+from datalad_next.commands import (
+    CommandResult,
+    CommandResultStatus,
+    EnsureCommandParameterization,
+    ValidatedInterface,
+    Parameter,
+    ParameterConstraintContext,
+    build_doc,
+    datasetmethod,
+    eval_results,
+)
+from datalad_next.constraints import (
+    EnsureChoice,
+    WithDescription,
+)
+from datalad_next.constraints.dataset import EnsureDataset
+
+from datalad_next.iter_collections.gitdiff import (
+    GitDiffStatus,
+    GitTreeItemType,
+    GitContainerModificationType,
+)
+from datalad_next.iter_collections.gitstatus import (
+    iter_gitstatus,
+)
+from datalad_next.uis import (
+    ui_switcher as ui,
+    ansi_colors as ac,
+)
+
+lgr = getLogger('datalad.core.local.status')
+
+
+# TODO Could be `StrEnum`, came with PY3.11
+class StatusState(Enum):
+    """Enumeration of possible states of a status command result
+
+    The "state" is the condition of the dataset item being reported
+    on.
+    """
+    clean = 'clean'
+    added = 'added'
+    modified = 'modified'
+    deleted = 'deleted'
+    untracked = 'untracked'
+    unknown = 'unknown'
+
+
+STATE_COLOR_MAP = {
+    StatusState.added: ac.GREEN,
+    StatusState.modified: ac.RED,
+    StatusState.deleted: ac.RED,
+    StatusState.untracked: ac.RED,
+    StatusState.unknown: ac.YELLOW,
+}
+
+
+diffstatus2resultstate_map = {
+    GitDiffStatus.addition: StatusState.added,
+    GitDiffStatus.copy: StatusState.added,
+    GitDiffStatus.deletion: StatusState.deleted,
+    GitDiffStatus.modification: StatusState.modified,
+    GitDiffStatus.rename: StatusState.added,
+    GitDiffStatus.typechange: StatusState.modified,
+    GitDiffStatus.unmerged: StatusState.unknown,
+    GitDiffStatus.unknown: StatusState.unknown,
+    GitDiffStatus.other: StatusState.untracked,
+}
+
+
+# see base class decorator comment for why this is commented out
+#@dataclass(kw_only=True)
+@dataclass
+class StatusResult(CommandResult):
+    # TODO any of the following property are not actually optional
+    # we only have to declare them such for limitations of dataclasses
+    # prior PY3.10 (see kw_only command in base class
+
+    diff_state: GitDiffStatus | None = None
+    """The ``status`` of the underlying ``GitDiffItem``. It is named
+    "_state" to emphasize the conceptual similarity with the legacy
+    property 'state'
+    """
+    gittype: GitTreeItemType | None = None
+    """The ``gittype`` of the underlying ``GitDiffItem``."""
+    prev_gittype: GitTreeItemType | None = None
+    """The ``prev_gittype`` of the underlying ``GitDiffItem``."""
+    modification_types: tuple[GitContainerModificationType] | None = None
+    """Qualifiers for modification types of container-type
+    items (directories, submodules)."""
+
+    @property
+    def state(self) -> StatusState:
+        """A (more or less legacy) simplified representation of the subject
+        state. For a more accurate classification use the ``diff_status``
+        property.
+        """
+        return diffstatus2resultstate_map[self.diff_state]
+
+    # the previous status-implementation did not report plain git-types
+    # we establish a getter to perform this kind of inference/mangling,
+    # when it is needed
+    @property
+    def type(self) -> str:
+        """
+        """
+        # TODO this is just a placeholder
+        return self.gittype.value if self.gittype else None
+
+    # we need a setter for this `type`-override stunt
+    @type.setter
+    def type(self, value):
+        self.gittype = value
+
+    @property
+    def prev_type(self) -> str:
+        """
+        """
+        return self.prev_gittype.value if self.prev_gittype else None
+
+    @property
+    def type_src(self) -> str | None:
+        """Backward-compatibility adaptor"""
+        return self.prev_type
+
+
+opt_untracked_values = ('no', 'whole-dir', 'no-empty-dir', 'normal', 'all')
+opt_recursive_values = ('no', 'repository', 'submodules', 'datasets')
+opt_eval_subdataset_state_values = ('no', 'commit', 'full')
+
+
+class StatusParamValidator(EnsureCommandParameterization):
+    def __init__(self):
+        super().__init__(
+            param_constraints=dict(
+                # if given, it must also exist
+                dataset=EnsureDataset(installed=True),
+                untracked=EnsureChoice(*opt_untracked_values),
+                recursive=EnsureChoice(*opt_recursive_values),
+                eval_subdataset_state=EnsureChoice(
+                    *opt_eval_subdataset_state_values)
+            ),
+            validate_defaults=('dataset',),
+            joint_constraints={
+                ParameterConstraintContext(('untracked', 'recursive'),
+                                           'option normalization'):
+                self.normalize_options,
+            },
+        )
+
+    def normalize_options(self, **kwargs):
+        if kwargs['untracked'] == 'no':
+            kwargs['untracked'] = None
+        if kwargs['untracked'] == 'normal':
+            kwargs['untracked'] = 'no-empty-dir'
+        if kwargs['recursive'] == 'datasets':
+            kwargs['recursive'] = 'submodules'
+        return kwargs
+
+
+@build_doc
+class Status(ValidatedInterface):
+    """The is a preview of an upcoming command implementation to replace
+    the DataLad ``status`` command.
+
+    For now expect anything here to change again.
+    """
+    # Interface.validate_args() will inspect this dict for the presence of a
+    # validator for particular parameters
+    _validator_ = StatusParamValidator()
+
+    # this is largely here for documentation and CLI parser building
+    _params_ = dict(
+        dataset=Parameter(
+            args=("-d", "--dataset"),
+            doc="""Dataset to be used as a configuration source. Beyond
+            reading configuration items, this command does not interact with
+            the dataset."""),
+        untracked=Parameter(
+            args=('--untracked',),
+            choices=opt_untracked_values,
+            doc="""Determine how untracked content is reported when
+            comparing a revision to the state of the working tree.
+            'no': no untracked content is reported;
+            'normal': untracked files and entire untracked directories are
+            reported as such;
+            'all': report individual files even in fully untracked directories.
+            In addition to these git-status modes,
+            'whole-dir' (like normal, but include empty directories), and
+            'no-empty-dir' (alias for 'normal') are understood."""),
+        recursive=Parameter(
+            args=('-r', '--recursive'),
+            nargs='?',
+            const='datasets',
+            choices=opt_recursive_values,
+            doc="some"),
+        eval_subdataset_state=Parameter(
+            args=("-e", "--eval-subdataset-state",),
+            choices=opt_eval_subdataset_state_values,
+            doc="""Evaluation of subdataset state (clean vs.
+            modified) can be expensive for deep dataset hierarchies
+            as subdataset have to be tested recursively for
+            uncommitted modifications. Setting this option to
+            'no' or 'commit' can substantially boost performance
+            by limiting what is being tested. With 'no' no state
+            is evaluated and subdataset result records do not
+            qualify the nature of a modifcation.
+            With 'commit' only a discrepancy of the HEAD commit
+            gitsha of a subdataset and the gitsha recorded in the
+            superdataset's record is evaluated.
+            With 'full' any other modification is considered
+            too."""),
+    )
+
+    _examples_ = [
+    ]
+
+    @staticmethod
+    @datasetmethod(name="next_status")
+    @eval_results
+    def __call__(
+        # TODO later
+        #path=None,
+        *,
+        dataset=None,
+        # TODO later
+        #annex=None,
+        untracked='normal',
+        recursive='repository',
+        # TODO this is needed for all recursion modes
+        # it would be necessary to traverse the full subtree
+        # underneath any reported submodule, in order to be able to report
+        # on the potential presence of untracked content
+        #
+        # for all recursion modes we would need to add support for comparing
+        # the HEAD commit of a submodule with the subproject commit in the
+        # parent, too
+        #
+        eval_subdataset_state='full',
+    ) -> Generator[StatusResult, None, None] | list[StatusResult]:
+        ds = dataset.ds
+        rootpath = Path.cwd() if dataset.original is None else ds.pathobj
+
+        for item in iter_gitstatus(
+            path=rootpath,
+            untracked=untracked,
+            recursive=recursive,
+            eval_submodule_state=eval_subdataset_state,
+        ):
+            yield StatusResult(
+                action='status',
+                status=CommandResultStatus.ok,
+                path=rootpath / (item.path or item.prev_path),
+                gittype=item.gittype,
+                prev_gittype=item.prev_gittype,
+                diff_state=item.status,
+                modification_types=item.modification_types,
+                refds=ds,
+                logger=lgr,
+            )
+
+    def custom_result_renderer(res, **kwargs):
+        # we are guaranteed to have dataset-arg info through uniform
+        # parameter validation
+        dsarg = kwargs['dataset']
+        rootpath = Path.cwd() if dsarg.original is None else dsarg.ds.pathobj
+        # because we can always determine the root path of the command
+        # execution environment, we can report meaningful relative paths
+        # unconditionally
+        path = res.path.relative_to(rootpath)
+        # collapse item type information across current and previous states
+        type_ = res.type or res.prev_type or ''
+        max_len = len('untracked')
+        state = res.state.value
+        # message format is same as for previous command implementation
+        ui.message(u'{fill}{state}: {path}{type_}{annot}'.format(
+            fill=' ' * max(0, max_len - len(state)),
+            state=ac.color_word(
+                res.state.value,
+                STATE_COLOR_MAP.get(res.state)),
+            path=path,
+            type_=' ({})'.format(ac.color_word(type_, ac.MAGENTA))
+            if type_ else '',
+            annot=f' [{", ".join(q.value for q in res.modification_types)}]'
+            if res.modification_types else '',
+        ))