Skip to content

Commit

Permalink
Start of a next-status command
Browse files Browse the repository at this point in the history
This implementation is the first to emit `CommandResult` type result
items, ie. dataclass instances rather than result dicts.

It also uses uniform parameter validation, enabling substantially
simplified implementation (e.g., of the result renderer).

The user-facing appearance remains (largely?) the same.

TODO more detailed analysis

The command options `untracked` and `recursive` now both take (optional)
qualifiying values, but also work with any value specification at the
CLI.

Closes datalad#586 (eventually)
  • Loading branch information
mih committed Jan 17, 2024
1 parent 588e9dd commit 9cb16ed
Show file tree
Hide file tree
Showing 8 changed files with 1,091 additions and 59 deletions.
4 changes: 4 additions & 0 deletions datalad_next/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@
'datalad_next.commands.ls_file_collection', 'LsFileCollection',
'ls-file-collection',
),
(
'datalad_next.commands.status', 'Status',
'next-status', 'next_status',
),
]
)

Expand Down
1 change: 1 addition & 0 deletions datalad_next/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
CommandResult
CommandResultStatus
status.StatusResult
"""
from __future__ import annotations

Expand Down
306 changes: 306 additions & 0 deletions datalad_next/commands/status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,306 @@
"""
"""
from __future__ import annotations

__docformat__ = 'restructuredtext'

from dataclasses import dataclass
from enum import Enum
from logging import getLogger
from pathlib import Path
from typing import Generator

from datalad_next.commands import (
CommandResult,
CommandResultStatus,
EnsureCommandParameterization,
ValidatedInterface,
Parameter,
ParameterConstraintContext,
build_doc,
datasetmethod,
eval_results,
)
from datalad_next.constraints import (
EnsureChoice,
WithDescription,
)
from datalad_next.constraints.dataset import EnsureDataset

from datalad_next.iter_collections.gitdiff import (
GitDiffStatus,
GitTreeItemType,
GitContainerModificationType,
)
from datalad_next.iter_collections.gitstatus import (
iter_gitstatus,
)
from datalad_next.uis import (
ui_switcher as ui,
ansi_colors as ac,
)

lgr = getLogger('datalad.core.local.status')


# TODO Could be `StrEnum`, came with PY3.11
class StatusState(Enum):
"""Enumeration of possible states of a status command result
The "state" is the condition of the dataset item being reported
on.
"""
clean = 'clean'
added = 'added'
modified = 'modified'
deleted = 'deleted'
untracked = 'untracked'
unknown = 'unknown'


STATE_COLOR_MAP = {
StatusState.added: ac.GREEN,
StatusState.modified: ac.RED,
StatusState.deleted: ac.RED,
StatusState.untracked: ac.RED,
StatusState.unknown: ac.YELLOW,
}


diffstatus2resultstate_map = {
GitDiffStatus.addition: StatusState.added,
GitDiffStatus.copy: StatusState.added,
GitDiffStatus.deletion: StatusState.deleted,
GitDiffStatus.modification: StatusState.modified,
GitDiffStatus.rename: StatusState.added,
GitDiffStatus.typechange: StatusState.modified,
GitDiffStatus.unmerged: StatusState.unknown,
GitDiffStatus.unknown: StatusState.unknown,
GitDiffStatus.other: StatusState.untracked,
}


# see base class decorator comment for why this is commented out
#@dataclass(kw_only=True)
@dataclass
class StatusResult(CommandResult):
# TODO any of the following property are not actually optional
# we only have to declare them such for limitations of dataclasses
# prior PY3.10 (see kw_only command in base class

diff_state: GitDiffStatus | None = None
"""The ``status`` of the underlying ``GitDiffItem``. It is named
"_state" to emphasize the conceptual similarity with the legacy
property 'state'
"""
gittype: GitTreeItemType | None = None
"""The ``gittype`` of the underlying ``GitDiffItem``."""
prev_gittype: GitTreeItemType | None = None
"""The ``prev_gittype`` of the underlying ``GitDiffItem``."""
modification_types: tuple[GitContainerModificationType] | None = None
"""Qualifiers for modification types of container-type
items (directories, submodules)."""

@property
def state(self) -> StatusState:
"""A (more or less legacy) simplified representation of the subject
state. For a more accurate classification use the ``diff_status``
property.
"""
return diffstatus2resultstate_map[self.diff_state]

# the previous status-implementation did not report plain git-types
# we establish a getter to perform this kind of inference/mangling,
# when it is needed
@property
def type(self) -> str | None:
"""
"""
# TODO this is just a placeholder
return self.gittype.value if self.gittype else None

# we need a setter for this `type`-override stunt
@type.setter
def type(self, value):
self.gittype = value

@property
def prev_type(self) -> str:
"""
"""
return self.prev_gittype.value if self.prev_gittype else None

@property
def type_src(self) -> str | None:
"""Backward-compatibility adaptor"""
return self.prev_type


opt_untracked_values = ('no', 'whole-dir', 'no-empty-dir', 'normal', 'all')
opt_recursive_values = ('no', 'repository', 'datasets', 'mono')
opt_eval_subdataset_state_values = ('no', 'commit', 'full')


class StatusParamValidator(EnsureCommandParameterization):
def __init__(self):
super().__init__(
param_constraints=dict(
# if given, it must also exist
dataset=EnsureDataset(installed=True),
untracked=EnsureChoice(*opt_untracked_values),
recursive=EnsureChoice(*opt_recursive_values),
eval_subdataset_state=EnsureChoice(
*opt_eval_subdataset_state_values)
),
validate_defaults=('dataset',),
joint_constraints={
ParameterConstraintContext(('untracked', 'recursive'),
'option normalization'):
self.normalize_options,
},
)

def normalize_options(self, **kwargs):
if kwargs['untracked'] == 'no':
kwargs['untracked'] = None
if kwargs['untracked'] == 'normal':
kwargs['untracked'] = 'no-empty-dir'
if kwargs['recursive'] == 'datasets':
kwargs['recursive'] = 'submodules'
if kwargs['recursive'] == 'mono':
kwargs['recursive'] = 'monolithic'
return kwargs


@build_doc
class Status(ValidatedInterface):
"""The is a preview of an upcoming command implementation to replace
the DataLad ``status`` command.
For now expect anything here to change again.
"""
# Interface.validate_args() will inspect this dict for the presence of a
# validator for particular parameters
_validator_ = StatusParamValidator()

# this is largely here for documentation and CLI parser building
_params_ = dict(
dataset=Parameter(
args=("-d", "--dataset"),
doc="""Dataset to be used as a configuration source. Beyond
reading configuration items, this command does not interact with
the dataset."""),
untracked=Parameter(
args=('--untracked',),
choices=opt_untracked_values,
doc="""Determine how untracked content is reported when
comparing a revision to the state of the working tree.
'no': no untracked content is reported;
'normal': untracked files and entire untracked directories are
reported as such;
'all': report individual files even in fully untracked directories.
In addition to these git-status modes,
'whole-dir' (like normal, but include empty directories), and
'no-empty-dir' (alias for 'normal') are understood."""),
recursive=Parameter(
args=('-r', '--recursive'),
nargs='?',
const='datasets',
choices=opt_recursive_values,
doc="some"),
eval_subdataset_state=Parameter(
args=("-e", "--eval-subdataset-state",),
choices=opt_eval_subdataset_state_values,
doc="""Evaluation of subdataset state (clean vs.
modified) can be expensive for deep dataset hierarchies
as subdataset have to be tested recursively for
uncommitted modifications. Setting this option to
'no' or 'commit' can substantially boost performance
by limiting what is being tested. With 'no' no state
is evaluated and subdataset result records do not
qualify the nature of a modification.
With 'commit' only a discrepancy of the HEAD commit
gitsha of a subdataset and the gitsha recorded in the
superdataset's record is evaluated.
With 'full' any other modification is considered
too."""),
)

_examples_ = [
]

@staticmethod
@datasetmethod(name="next_status")
@eval_results
def __call__(
# TODO later
#path=None,
*,
dataset=None,
# TODO later
#annex=None,
untracked='normal',
recursive='repository',
# TODO this is needed for all recursion modes
# it would be necessary to traverse the full subtree
# underneath any reported submodule, in order to be able to report
# on the potential presence of untracked content
#
# for all recursion modes we would need to add support for comparing
# the HEAD commit of a submodule with the subproject commit in the
# parent, too
#
eval_subdataset_state='full',
) -> Generator[StatusResult, None, None] | list[StatusResult]:
ds = dataset.ds
rootpath = Path.cwd() if dataset.original is None else ds.pathobj

for item in iter_gitstatus(
path=rootpath,
untracked=untracked,
recursive=recursive,
eval_submodule_state=eval_subdataset_state,
):
yield StatusResult(
action='status',
status=CommandResultStatus.ok,
path=rootpath / (item.path or item.prev_path),
gittype=item.gittype,
prev_gittype=item.prev_gittype,
diff_state=item.status,
modification_types=item.modification_types,
refds=ds,
logger=lgr,
)

def custom_result_renderer(res, **kwargs):
# we are guaranteed to have dataset-arg info through uniform
# parameter validation
dsarg = kwargs['dataset']
rootpath = Path.cwd() if dsarg.original is None else dsarg.ds.pathobj
# because we can always determine the root path of the command
# execution environment, we can report meaningful relative paths
# unconditionally
path = res.path.relative_to(rootpath)
# collapse item type information across current and previous states
type_ = res.type or res.prev_type or ''
max_len = len('untracked')
state = res.state.value
# message format is same as for previous command implementation
ui.message(u'{fill}{state}: {path}{type_}{annot}'.format(
fill=' ' * max(0, max_len - len(state)),
state=ac.color_word(
res.state.value,
STATE_COLOR_MAP.get(res.state)),
path=path,
type_=' ({})'.format(ac.color_word(type_, ac.MAGENTA))
if type_ else '',
annot=f' [{", ".join(q.value for q in res.modification_types)}]'
if res.modification_types else '',
))

@staticmethod
def custom_result_summary_renderer(results):
# no reports, no changes
if len(results) == 0:
ui.message("nothing to save, working tree clean")
2 changes: 1 addition & 1 deletion datalad_next/iter_collections/gitdiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def iter_gitdiff(
find_copies: int | None = None,
yield_tree_items: str | None = None,
# TODO add documentation
eval_submodule_state: str = 'commit',
eval_submodule_state: str = 'full',
) -> Generator[GitDiffItem, None, None]:
"""Report differences between Git tree-ishes or tracked worktree content
Expand Down
Loading

0 comments on commit 9cb16ed

Please sign in to comment.