From 89c4ee92e7d538d507e4077fc1789d9cfc6ed228 Mon Sep 17 00:00:00 2001
From: Michael Hanke <michael.hanke@gmail.com>
Date: Thu, 11 Jan 2024 18:01:13 +0100
Subject: [PATCH] Start of a `next-status` command

This implementation is the first to emit `CommandResult` type result
items, ie. dataclass instances rather than result dicts.

It also uses uniform parameter validation, enabling substantially
simplified implementation (e.g., of the result renderer).

The user-facing appearance remains (largely) the same. The command
documentation contains a summary of the key differences.

Closes #586
---
 datalad_next/__init__.py                      |   4 +
 datalad_next/commands/__init__.py             |   1 +
 datalad_next/commands/status.py               | 375 ++++++++++++++
 datalad_next/commands/tests/test_status.py    |  55 ++
 datalad_next/iter_collections/gitdiff.py      |   2 +-
 datalad_next/iter_collections/gitstatus.py    | 488 +++++++++++++++---
 .../tests/test_itergitstatus.py               | 349 +++++++++++++
 docs/source/api.rst                           |   1 +
 docs/source/cmd.rst                           |   1 +
 9 files changed, 1217 insertions(+), 59 deletions(-)
 create mode 100644 datalad_next/commands/status.py
 create mode 100644 datalad_next/commands/tests/test_status.py
 create mode 100644 datalad_next/iter_collections/tests/test_itergitstatus.py

diff --git a/datalad_next/__init__.py b/datalad_next/__init__.py
index 116df896c..86b879839 100644
--- a/datalad_next/__init__.py
+++ b/datalad_next/__init__.py
@@ -43,6 +43,10 @@
             'datalad_next.commands.ls_file_collection', 'LsFileCollection',
             'ls-file-collection',
         ),
+        (
+            'datalad_next.commands.status', 'Status',
+            'next-status', 'next_status',
+        ),
     ]
 )
 
diff --git a/datalad_next/commands/__init__.py b/datalad_next/commands/__init__.py
index 36de84564..c7cfe99b5 100644
--- a/datalad_next/commands/__init__.py
+++ b/datalad_next/commands/__init__.py
@@ -14,6 +14,7 @@
 
    CommandResult
    CommandResultStatus
+   status.StatusResult
 """
 from __future__ import annotations
 
diff --git a/datalad_next/commands/status.py b/datalad_next/commands/status.py
new file mode 100644
index 000000000..61af15911
--- /dev/null
+++ b/datalad_next/commands/status.py
@@ -0,0 +1,375 @@
+"""
+"""
+from __future__ import annotations
+
+__docformat__ = 'restructuredtext'
+
+from dataclasses import dataclass
+from enum import Enum
+from logging import getLogger
+from pathlib import Path
+from typing import Generator
+
+from datalad_next.commands import (
+    CommandResult,
+    CommandResultStatus,
+    EnsureCommandParameterization,
+    ValidatedInterface,
+    Parameter,
+    ParameterConstraintContext,
+    build_doc,
+    datasetmethod,
+    eval_results,
+)
+from datalad_next.constraints import (
+    EnsureChoice,
+    WithDescription,
+)
+from datalad_next.constraints.dataset import EnsureDataset
+
+from datalad_next.iter_collections.gitdiff import (
+    GitDiffStatus,
+    GitTreeItemType,
+    GitContainerModificationType,
+)
+from datalad_next.iter_collections.gitstatus import (
+    iter_gitstatus,
+)
+from datalad_next.uis import (
+    ui_switcher as ui,
+    ansi_colors as ac,
+)
+
+lgr = getLogger('datalad.core.local.status')
+
+
+# TODO Could be `StrEnum`, came with PY3.11
+class StatusState(Enum):
+    """Enumeration of possible states of a status command result
+
+    The "state" is the condition of the dataset item being reported
+    on.
+    """
+    clean = 'clean'
+    added = 'added'
+    modified = 'modified'
+    deleted = 'deleted'
+    untracked = 'untracked'
+    unknown = 'unknown'
+
+
+STATE_COLOR_MAP = {
+    StatusState.added: ac.GREEN,
+    StatusState.modified: ac.RED,
+    StatusState.deleted: ac.RED,
+    StatusState.untracked: ac.RED,
+    StatusState.unknown: ac.YELLOW,
+}
+
+
+diffstatus2resultstate_map = {
+    GitDiffStatus.addition: StatusState.added,
+    GitDiffStatus.copy: StatusState.added,
+    GitDiffStatus.deletion: StatusState.deleted,
+    GitDiffStatus.modification: StatusState.modified,
+    GitDiffStatus.rename: StatusState.added,
+    GitDiffStatus.typechange: StatusState.modified,
+    GitDiffStatus.unmerged: StatusState.unknown,
+    GitDiffStatus.unknown: StatusState.unknown,
+    GitDiffStatus.other: StatusState.untracked,
+}
+
+
+# see base class decorator comment for why this is commented out
+#@dataclass(kw_only=True)
+@dataclass
+class StatusResult(CommandResult):
+    # TODO any of the following property are not actually optional
+    # we only have to declare them such for limitations of dataclasses
+    # prior PY3.10 (see kw_only command in base class
+
+    diff_state: GitDiffStatus | None = None
+    """The ``status`` of the underlying ``GitDiffItem``. It is named
+    "_state" to emphasize the conceptual similarity with the legacy
+    property 'state'
+    """
+    gittype: GitTreeItemType | None = None
+    """The ``gittype`` of the underlying ``GitDiffItem``."""
+    prev_gittype: GitTreeItemType | None = None
+    """The ``prev_gittype`` of the underlying ``GitDiffItem``."""
+    modification_types: tuple[GitContainerModificationType] | None = None
+    """Qualifiers for modification types of container-type
+    items (directories, submodules)."""
+
+    @property
+    def state(self) -> StatusState:
+        """A (more or less legacy) simplified representation of the subject
+        state. For a more accurate classification use the ``diff_status``
+        property.
+        """
+        return diffstatus2resultstate_map[self.diff_state]
+
+    # the previous status-implementation did not report plain git-types
+    # we establish a getter to perform this kind of inference/mangling,
+    # when it is needed
+    @property
+    def type(self) -> str | None:
+        """
+        """
+        # TODO this is just a placeholder
+        return self.gittype.value if self.gittype else None
+
+    # we need a setter for this `type`-override stunt
+    @type.setter
+    def type(self, value):
+        self.gittype = value
+
+    @property
+    def prev_type(self) -> str:
+        """
+        """
+        return self.prev_gittype.value if self.prev_gittype else None
+
+    @property
+    def type_src(self) -> str | None:
+        """Backward-compatibility adaptor"""
+        return self.prev_type
+
+
+opt_untracked_values = ('no', 'whole-dir', 'no-empty-dir', 'normal', 'all')
+opt_recursive_values = ('no', 'repository', 'datasets', 'mono')
+opt_eval_subdataset_state_values = ('no', 'commit', 'full')
+
+
+class StatusParamValidator(EnsureCommandParameterization):
+    def __init__(self):
+        super().__init__(
+            param_constraints=dict(
+                # if given, it must also exist
+                dataset=EnsureDataset(installed=True),
+                untracked=EnsureChoice(*opt_untracked_values),
+                recursive=EnsureChoice(*opt_recursive_values),
+                eval_subdataset_state=EnsureChoice(
+                    *opt_eval_subdataset_state_values)
+            ),
+            validate_defaults=('dataset',),
+            joint_constraints={
+                ParameterConstraintContext(('untracked', 'recursive'),
+                                           'option normalization'):
+                self.normalize_options,
+            },
+        )
+
+    def normalize_options(self, **kwargs):
+        if kwargs['untracked'] == 'no':
+            kwargs['untracked'] = None
+        if kwargs['untracked'] == 'normal':
+            kwargs['untracked'] = 'no-empty-dir'
+        if kwargs['recursive'] == 'datasets':
+            kwargs['recursive'] = 'submodules'
+        if kwargs['recursive'] == 'mono':
+            kwargs['recursive'] = 'monolithic'
+        return kwargs
+
+
+@build_doc
+class Status(ValidatedInterface):
+    """Report on the (modification) status of a dataset
+
+    .. note::
+
+        This is a preview of an command implementation aiming to replace
+        the DataLad ``status`` command.
+
+        For now, expect anything here to change again.
+
+    This command provides a report that is roughly identical to that of
+    ``git status``. Running with default parameters yields a report that
+    should look familiar to Git and DataLad users alike, and contain
+    the same information as offered by ``git status``.
+
+    The main difference to ``git status`` are:
+
+    - Support for recursion into submodule. ``git status`` does that too,
+      but the report is limited to the global state of an entire submodule,
+      whereas this command can issue detailed reports in changes inside
+      a submodule (any nesting depth).
+
+    - Support for directory-constrained reporting. Much like ``git status``
+      limits its report to a single repository, this command can optionally
+      limit its report to a single directory and its direct children. In this
+      report subdirectories are considered containers (much like) submodules,
+      and a change summary is provided for them.
+
+    - Support for a "mono" (monolithic repository) report. Unlike a standard
+      recursion into submodules, and checking each of them for changes with
+      respect to the HEAD commit of the worktree, this report compares a
+      submodule with respect to the state recorded in its parent repository.
+      This provides an equally comprehensive status report from the point of
+      view of a queried repository, but does not include a dedicated item on
+      the global state of a submodule. This makes nested hierarchy of
+      repositories appear like a single (mono) repository.
+
+    - Support for "adjusted mode" git-annex repositories. These utilize a
+      managed branch that is repeatedly rewritten, hence is not suitable
+      for tracking within a parent repository. Instead, the underlying
+      "corresponding branch" is used, which contains the equivalent content
+      in an un-adjusted form, persistently. This command detects this condition
+      and automatically check a repositories state against the corresponding
+      branch state.
+
+    *Presently missing/planned features*
+
+    - There is no support for specifying paths (or pathspecs) for constraining
+      the operation to specific dataset parts. This will be added in the
+      future.
+
+    - There is no reporting of git-annex properties, such as tracked file size.
+      It is undetermined whether this will be added in the future. However,
+      even without a dedicated switch, this command has support for
+      datasets (and their submodules) in git-annex's "adjusted mode".
+
+    *Differences to the ``status`` command implementation prior DataLad v2*
+
+    - Like ``git status`` this implementation reports on dataset modification,
+      whereas the previous ``status`` also provided a listing of unchanged
+      dataset content. This is no longer done. Equivalent functionality for
+      listing dataset content is provided by the ``ls_file_collection``
+      command.
+    - The implementation is substantially faster. Depending on the context
+      the speed-up is typically somewhere between 2x and 100x.
+    - The implementation does not suffer from the limitation re type change
+      detection.
+    - Python and CLI API of the command use uniform parameter validation.
+    """
+    # Interface.validate_args() will inspect this dict for the presence of a
+    # validator for particular parameters
+    _validator_ = StatusParamValidator()
+
+    # this is largely here for documentation and CLI parser building
+    _params_ = dict(
+        dataset=Parameter(
+            args=("-d", "--dataset"),
+            doc="""Dataset to be used as a configuration source. Beyond
+            reading configuration items, this command does not interact with
+            the dataset."""),
+        untracked=Parameter(
+            args=('--untracked',),
+            choices=opt_untracked_values,
+            doc="""Determine how untracked content is considered and reported
+            when comparing a revision to the state of the working tree.
+            'no': no untracked content is considered as a change;
+            'normal': untracked files and entire untracked directories are
+            reported as such;
+            'all': report individual files even in fully untracked directories.
+            In addition to these git-status modes,
+            'whole-dir' (like normal, but include empty directories), and
+            'no-empty-dir' (alias for 'normal') are understood."""),
+        recursive=Parameter(
+            args=('-r', '--recursive'),
+            nargs='?',
+            const='datasets',
+            choices=opt_recursive_values,
+            doc="""Mode of recursion for status reporting.
+            With 'no' the report is restricted to a single directory and
+            its direct children.
+            With 'repository', the report comprises all repository content
+            underneath current working directory or root of a given dataset,
+            but is limited to items directly contained in that repository.
+            With 'datasets', the report also comprises any content in any
+            subdatasets. Each subdataset is evaluated against its respective
+            HEAD commit.
+            With 'mono', a report similar to 'datasets' is generated, but
+            any subdataset is evaluate with respect to the state recorded
+            in its parent repository. In constrast to the 'datasets' mode,
+            no report items on a joint submodule are generated.
+            [CMD: If no particular value is given with this option the
+            'datasets' mode is selected. CMD]
+            """),
+        eval_subdataset_state=Parameter(
+            args=("-e", "--eval-subdataset-state",),
+            choices=opt_eval_subdataset_state_values,
+            doc="""Evaluation of subdataset state (modified or untracked
+            content) can be expensive for deep dataset hierarchies
+            as subdataset have to be tested recursively for
+            uncommitted modifications. Setting this option to
+            'no' or 'commit' can substantially boost performance
+            by limiting what is being tested.
+            With 'no' no state is evaluated and subdataset are not
+            investigated for modifications.
+            With 'commit' only a discrepancy of the HEAD commit
+            gitsha of a subdataset and the gitsha recorded in the
+            superdataset's record is evaluated.
+            With 'full' any other modifications are considered
+            too."""),
+    )
+
+    _examples_ = [
+    ]
+
+    @staticmethod
+    @datasetmethod(name="next_status")
+    @eval_results
+    def __call__(
+        # TODO later
+        #path=None,
+        *,
+        dataset=None,
+        # TODO possibly later
+        #annex=None,
+        untracked='normal',
+        recursive='repository',
+        eval_subdataset_state='full',
+    ) -> Generator[StatusResult, None, None] | list[StatusResult]:
+        ds = dataset.ds
+        rootpath = Path.cwd() if dataset.original is None else ds.pathobj
+
+        for item in iter_gitstatus(
+            path=rootpath,
+            untracked=untracked,
+            recursive=recursive,
+            eval_submodule_state=eval_subdataset_state,
+        ):
+            yield StatusResult(
+                action='status',
+                status=CommandResultStatus.ok,
+                path=rootpath / (item.path or item.prev_path),
+                gittype=item.gittype,
+                prev_gittype=item.prev_gittype,
+                diff_state=item.status,
+                modification_types=item.modification_types,
+                refds=ds,
+                logger=lgr,
+            )
+
+    def custom_result_renderer(res, **kwargs):
+        # we are guaranteed to have dataset-arg info through uniform
+        # parameter validation
+        dsarg = kwargs['dataset']
+        rootpath = Path.cwd() if dsarg.original is None else dsarg.ds.pathobj
+        # because we can always determine the root path of the command
+        # execution environment, we can report meaningful relative paths
+        # unconditionally
+        path = res.path.relative_to(rootpath)
+        # collapse item type information across current and previous states
+        type_ = res.type or res.prev_type or ''
+        max_len = len('untracked')
+        state = res.state.value
+        # message format is same as for previous command implementation
+        ui.message(u'{fill}{state}: {path}{type_}{annot}'.format(
+            fill=' ' * max(0, max_len - len(state)),
+            state=ac.color_word(
+                res.state.value,
+                STATE_COLOR_MAP.get(res.state)),
+            path=path,
+            type_=' ({})'.format(ac.color_word(type_, ac.MAGENTA))
+            if type_ else '',
+            annot=f' [{", ".join(q.value for q in res.modification_types)}]'
+            if res.modification_types else '',
+        ))
+
+    @staticmethod
+    def custom_result_summary_renderer(results):
+        # no reports, no changes
+        if len(results) == 0:
+            ui.message("nothing to save, working tree clean")
diff --git a/datalad_next/commands/tests/test_status.py b/datalad_next/commands/tests/test_status.py
new file mode 100644
index 000000000..55b91d244
--- /dev/null
+++ b/datalad_next/commands/tests/test_status.py
@@ -0,0 +1,55 @@
+import pytest
+
+from datalad.api import next_status
+
+from datalad_next.constraints.exceptions import (
+    CommandParametrizationError,
+    ParameterConstraintContext,
+)
+from datalad_next.tests.utils import chpwd
+
+from ..status import (
+    opt_eval_subdataset_state_values,
+    opt_recursive_values,
+    opt_untracked_values,
+)
+
+
+def test_status_invalid(tmp_path, datalad_cfg):
+    # we want exhaustive parameter validation (i.e., continue after
+    # first failure), saves some code here
+    datalad_cfg.set('datalad.runtime.parameter-violation',
+                    'raise-at-end',
+                    scope='global')
+    with chpwd(tmp_path):
+        with pytest.raises(CommandParametrizationError) as e:
+            next_status(
+                untracked='weird',
+                recursive='upsidedown',
+                eval_subdataset_state='moonphase',
+            )
+        errors = e.value.errors
+        assert 'no dataset found' in \
+            errors[ParameterConstraintContext(('dataset',))].msg.casefold()
+        for opt in ('untracked', 'recursive', 'eval_subdataset_state'):
+            assert 'is not one of' in \
+                errors[ParameterConstraintContext((opt,))].msg.casefold()
+
+
+def test_status_renderer_smoke(existing_dataset):
+    ds = existing_dataset
+    assert ds.next_status() == []
+    (ds.pathobj / 'untracked').touch()
+    st = ds.next_status()
+    assert len(st) == 1
+
+
+def test_status_clean(existing_dataset, no_result_rendering):
+    ds = existing_dataset
+    ds.create('subds')
+    for recmode in opt_recursive_values:
+        assert [] == ds.next_status(recursive=recmode)
+    for untracked in opt_untracked_values:
+        assert [] == ds.next_status(untracked=untracked)
+    for eval_sm in opt_eval_subdataset_state_values:
+        assert [] == ds.next_status(eval_subdataset_state=eval_sm)
diff --git a/datalad_next/iter_collections/gitdiff.py b/datalad_next/iter_collections/gitdiff.py
index 7bf71744b..346c53b3c 100644
--- a/datalad_next/iter_collections/gitdiff.py
+++ b/datalad_next/iter_collections/gitdiff.py
@@ -113,7 +113,7 @@ def iter_gitdiff(
     find_copies: int | None = None,
     yield_tree_items: str | None = None,
     # TODO add documentation
-    eval_submodule_state: str = 'commit',
+    eval_submodule_state: str = 'full',
 ) -> Generator[GitDiffItem, None, None]:
     """Report differences between Git tree-ishes or tracked worktree content
 
diff --git a/datalad_next/iter_collections/gitstatus.py b/datalad_next/iter_collections/gitstatus.py
index 66d6f78f6..e75627888 100644
--- a/datalad_next/iter_collections/gitstatus.py
+++ b/datalad_next/iter_collections/gitstatus.py
@@ -7,17 +7,32 @@
 import logging
 from pathlib import (
     Path,
-    PurePosixPath,
+    PurePath,
 )
 from typing import Generator
 
+from datalad_next.runners import (
+    CommandError,
+    iter_git_subproc,
+)
+from datalad_next.itertools import (
+    decode_bytes,
+    itemize,
+)
+from datalad_next.repo_utils import (
+    get_worktree_head,
+    iter_submodules,
+)
+
 from .gitdiff import (
     GitDiffItem,
     GitDiffStatus,
-    GitTreeItemType,
+    GitContainerModificationType,
     iter_gitdiff,
 )
 from .gitworktree import (
+    GitTreeItem,
+    GitTreeItemType,
     iter_gitworktree,
     lsfiles_untracked_args,
     _git_ls_files,
@@ -31,9 +46,17 @@ def iter_gitstatus(
     *,
     untracked: str | None = 'all',
     recursive: str = 'repository',
-    yield_tree_items: str | None = None,
+    eval_submodule_state: str = "full",
 ) -> Generator[GitDiffItem, None, None]:
     """
+    Recursion mode 'no'
+
+    This mode limits the reporting to immediate directory items of a given
+    path. This mode is not necessarily faster than a 'repository' recursion.
+    Its primary purpose is the ability to deliver a collapsed report in that
+    subdirectories are treated similar to submodules -- as containers that
+    maybe have modified or untracked content.
+
     Parameters
     ----------
     path: Path
@@ -47,20 +70,23 @@ def iter_gitstatus(
       ``all`` reports on any untracked file; ``whole-dir`` yields a single
       report for a directory that is entirely untracked, and not individual
       untracked files in it; ``no-empty-dir`` skips any reports on
-      untracked empty directories.
-    recursive: {'repository', 'submodules', 'no'}, optional
+      untracked empty directories. Also see ``eval_submodule_state`` for
+      how this parameter is applied in submodule recursion.
+    recursive: {'no', 'repository', 'submodules', 'monolithic'}, optional
       Behavior for recursion into subtrees. By default (``repository``),
       all trees within the repository underneath ``path``) are reported,
       but no tree within submodules. With ``submodules``, recursion includes
       any submodule that is present. If ``no``, only direct children
       are reported on.
-    yield_tree_items: {'submodules', 'directories', 'all', None}, optional
-      Whether to yield an item on type of subtree that will also be recursed
-      into. For example, a submodule item, when submodule recursion is
-      enabled. When disabled, subtree items (directories, submodules)
-      will still be reported whenever there is no recursion into them.
-      For example, submodule items are reported when
-      ``recursive='repository``, even when ``yield_tree_items=None``.
+    eval_submodule_state: {"no", "commit", "full"}, optional
+      If 'full' (default), the state of a submodule is evaluated by
+      considering all modifications, with the treatment of untracked files
+      determined by `untracked`. If 'commit', the modification check is
+      restricted to comparing the submodule's "HEAD" commit to the one
+      recorded in the superdataset. If 'no', the state of the subdataset is
+      not evaluated. When a git-annex repository in adjusted mode is detected,
+      the reference commit that the worktree is being compared to is the basis
+      of the adjusted branch (i.e., the corresponding branch).
 
     Yields
     ------
@@ -71,72 +97,274 @@ def iter_gitstatus(
     """
     path = Path(path)
 
-    if untracked is None:
-        # we can delegate all of this
-        yield from iter_gitdiff(
-            path,
-            from_treeish='HEAD',
-            # to the worktree
-            to_treeish=None,
-            recursive=recursive,
-            yield_tree_items=yield_tree_items,
-        )
+    head, corresponding_head = get_worktree_head(path)
+    # TODO it would make sense to always (or optionally) compare against any
+    # existing corresponding_head. This would make the status communicate
+    # anything that has not made it into the corresponding branch yet
+
+    common_args = dict(
+        head=head,
+        path=path,
+        untracked=untracked,
+        eval_submodule_state=eval_submodule_state,
+    )
+
+    if recursive == 'no':
+        yield from _yield_dir_items(**common_args)
         return
+    elif recursive == 'repository':
+        yield from _yield_repo_items(**common_args)
+    # TODO what we really want is a status that is not against a per-repository
+    # HEAD, but against the commit that is recorded in the parent repository
+    # TODO we need a name for that
+    elif recursive in ('submodules', 'monolithic'):
+        yield from _yield_hierarchy_items(
+            recursion_mode=recursive,
+            **common_args,
+        )
+    else:
+        raise ValueError(f'unknown recursion type {recursive!r}')
+
 
-    # limit to within-repo, at most
-    recmode = 'repository' if recursive == 'submodules' else recursive
+#
+# status generators for each mode
+#
 
-    # we always start with a repository-contrained diff against the worktree
-    # tracked content
-    for item in iter_gitdiff(
+def _yield_dir_items(
+    *,
+    head: str | None,
+    path: Path,
+    untracked: str | None,
+    eval_submodule_state: str,
+):
+    # potential container items in a directory that need content
+    # investigation
+    container_types = (
+        GitTreeItemType.directory,
+        GitTreeItemType.submodule,
+    )
+    if untracked == 'no':
+        # no need to look at anything other than the diff report
+        dir_items = {}
+    else:
+        # there is no recursion, avoid wasting cycles on listing individual
+        # files in subdirectories
+        untracked = 'whole-dir' if untracked == 'all' else untracked
+        # gather all dierectory items upfront, we subtract the ones reported
+        # modified later and lastly yield all untracked content from them
+        dir_items = {
+            str(item.name): item
+            for item in iter_gitworktree(
+                path,
+                untracked=untracked,
+                recursive='no',
+            )
+        }
+    # diff constrained to direct children
+    for item in ([] if head is None else iter_gitdiff(
         path,
         from_treeish='HEAD',
         # to the worktree
         to_treeish=None,
-        recursive=recmode,
-        yield_tree_items=yield_tree_items,
-    ):
-        # TODO when recursive==submodules, do not yield present
-        # items of present submodules unless yield_tree_items says so
-        yield item
-
-    # now untracked files of this repo
-    assert untracked is not None
-    yield from _yield_repo_untracked(path, untracked)
+        recursive='no',
+        # TODO trim scope like in repo_items
+        eval_submodule_state=eval_submodule_state,
+    )):
+        if item.status != GitDiffStatus.deletion \
+                and item.gittype in container_types:
+            if item.gittype == GitTreeItemType.submodule:
+                # issue standard submodule container report
+                _eval_submodule(path, item, eval_submodule_state)
+            else:
+                dir_path = path / item.path
+                # this is on a directory. if it appears here, it has
+                # modified content
+                if dir_path.exists():
+                    item.add_modification_type(
+                        GitContainerModificationType.modified_content)
+                    if untracked != 'no' \
+                            and _path_has_untracked(path / item.path):
+                        item.add_modification_type(
+                            GitContainerModificationType.untracked_content)
+                else:
+                    # this directory is gone entirely
+                    item.status = GitDiffStatus.deletion
+                    item.modification_types = None
+            # we dealt with this item completely
+            dir_items.pop(item.name, None)
+        if item.status:
+            yield item
 
-    if recursive != 'submodules':
-        # all other modes of recursion have been dealt with
+    if untracked == 'no':
         return
 
-    # at this point, we know we need to recurse into submodule, and we still
-    # have to report on untracked files -> scan the worktree
-    for item in iter_gitworktree(
+    # yield anything untracked, and inspect remaining containers
+    for dir_item in dir_items.values():
+        if dir_item.gitsha is None and dir_item.gittype is None:
+            # this is untracked
+            yield GitDiffItem(
+                # for homgeneity for report a str-path no matter what
+                name=str(dir_item.name),
+                status=GitDiffStatus.other,
+            )
+        elif dir_item.gittype in container_types:
+            # none of these containers has any modification other than
+            # possibly untracked content
+            item = GitDiffItem(
+                # for homgeneity for report a str-path no matter what
+                name=str(dir_item.name),
+                # this submodule has not been detected as modified
+                # per-commit, assign reported gitsha to pre and post
+                # state
+                gitsha=dir_item.gitsha,
+                prev_gitsha=dir_item.gitsha,
+                gittype=dir_item.gittype,
+                # TODO others?
+            )
+            if item.gittype == GitTreeItemType.submodule:
+                # issue standard submodule container report
+                _eval_submodule(path, item, eval_submodule_state)
+            else:
+                # this is on a directory. if it appears here, it has
+                # no modified content
+                if _path_has_untracked(path / dir_item.path):
+                    item.status = GitDiffStatus.modification
+                    item.add_modification_type(
+                        GitContainerModificationType.untracked_content)
+            if item.status:
+                yield item
+
+
+def _yield_repo_items(
+    *,
+    head: str | None,
+    path: Path,
+    untracked: str | None,
+    eval_submodule_state: str,
+) -> Generator[GitDiffItem, None, None]:
+    """Report status items for a single/whole repsoitory"""
+    present_submodules = {
+        # stringify name for speedy comparison
+        # TODO double-check that comparisons are primarily with
+        # GitDiffItem.name which is str
+        str(item.name): item for item in iter_submodules(path)
+    }
+    # start with a repository-contrained diff against the worktree
+    for item in ([] if head is None else iter_gitdiff(
         path,
-        untracked=None,
-        link_target=False,
-        fp=False,
-        # singledir mode has been ruled out above,
-        # we need to find all submodules
+        from_treeish='HEAD',
+        # to the worktree
+        to_treeish=None,
         recursive='repository',
+        # we should be able to go cheaper with the submodule evaluation here.
+        # We need to redo some check for adjusted mode, and other cases anyways
+        eval_submodule_state='commit'
+        if eval_submodule_state == 'full' else eval_submodule_state,
+    )):
+        # immediately investigate any submodules that are already
+        # reported modified by Git
+        if item.gittype == GitTreeItemType.submodule:
+            _eval_submodule(path, item, eval_submodule_state)
+            # we dealt with this submodule
+            present_submodules.pop(item.name, None)
+        if item.status:
+            yield item
+
+    # we are not generating a recursive report for submodules, hence
+    # we need to look at ALL submodules for untracked content
+    # `or {}` for the case where we got no submodules, which happens
+    # with `eval_submodule_state == 'no'`
+    for subm_name, subm_item in (present_submodules or {}).items():
+        # none of these submodules has any modification other than
+        # possibly untracked content
+        item = GitDiffItem(
+            # for homgeneity for report a str-path no matter what
+            name=str(subm_item.name),
+            # this submodule has not been detected as modified
+            # per-commit, assign reported gitsha to pre and post
+            # state
+            gitsha=subm_item.gitsha,
+            prev_gitsha=subm_item.gitsha,
+            gittype=subm_item.gittype,
+            # TODO others?
+        )
+        # TODO possibly trim eval_submodule_state
+        _eval_submodule(path, item, eval_submodule_state)
+        if item.status:
+            yield item
+
+    if untracked == 'no':
+        return
+
+    # lastly untracked files of this repo
+    yield from _yield_repo_untracked(path, untracked)
+
+
+def _yield_hierarchy_items(
+    *,
+    head: str | None,
+    path: Path,
+    untracked: str | None,
+    recursion_mode: str,
+    eval_submodule_state: str,
+) -> Generator[GitDiffItem, None, None]:
+    for item in _yield_repo_items(
+        head=head,
+        path=path,
+        untracked=untracked,
+        # TODO do we need to adjust the eval mode here for the diff recmodes?
+        eval_submodule_state=eval_submodule_state,
     ):
-        if item.gittype != GitTreeItemType.submodule \
-                or item.name == PurePosixPath('.'):
-            # either this is no submodule, or a submodule that was found at
-            # the root path -- which would indicate that the submodule
-            # itself it not around, only its record in the parent
+        # we get to see any submodule item passing through here, and can simply
+        # call this function again for a subpath
+        if item.gittype != GitTreeItemType.submodule:
+            yield item
             continue
-        for i in iter_gitstatus(
-            # the .path of a GitTreeItem is always POSIX
-            path=path / item.path,
+
+        # submodule recursion
+        # the .path of a GitTreeItem is always POSIX
+        sm_path = path / item.path
+        if recursion_mode == 'submodules':
+            # in this mode, we run the submodule status against it own
+            # worktree head
+            sm_head, _ = get_worktree_head(sm_path)
+            # because this need not cover all possible changes with respect
+            # to the parent repository, we yield an item on the submodule
+            # itself
+            yield item
+        elif recursion_mode == 'monolithic':
+            # in this mode we determine the change of the submodule with
+            # respect to the recorded state in the parent. This is either
+            # the current gitsha, or (if git detected a committed
+            # modification) the previous sha. This way, any further report
+            # on changes a comprehensive from the point of view of the parent
+            # repository, hence no submodule item is emitted
+            sm_head = item.gitsha or item.prev_gitsha
+
+        for i in _yield_hierarchy_items(
+            head=sm_head,
+            path=sm_path,
             untracked=untracked,
-            recursive='submodules',
-            yield_tree_items=yield_tree_items,
+            # TODO here we could implement handling for a recursion-depth limit
+            recursion_mode=recursion_mode,
+            eval_submodule_state=eval_submodule_state,
         ):
             i.name = f'{item.name}/{i.name}'
             yield i
 
 
-def _yield_repo_untracked(path, untracked):
+#
+# Helpers
+#
+
+
+def _yield_repo_untracked(
+        path: Path,
+        untracked: str,
+) -> Generator[GitDiffItem, None, None]:
+    """Yield items on all untracked content in a repository"""
+    if untracked is None:
+        return
     for uf in _git_ls_files(
         path,
         *lsfiles_untracked_args[untracked],
@@ -145,3 +373,147 @@ def _yield_repo_untracked(path, untracked):
             name=uf,
             status=GitDiffStatus.other,
         )
+
+
+def _path_has_untracked(path: Path) -> bool:
+    """Recursively check for any untracked content (except empty dirs)"""
+    if not path.exists():
+        # cannot possibly have untracked
+        return False
+    for ut in _yield_repo_untracked(
+        path,
+        'no-empty-dir',
+    ):
+        # fast exit on the first detection
+        return True
+    # we need to find all submodules, regardless of mode.
+    # untracked content can also be in a submodule underneath
+    # a directory
+    for subm in iter_submodules(path):
+        if _path_has_untracked(path / subm.path):
+            # fast exit on the first detection
+            return True
+    # only after we saw everything we can say there is nothing
+    return False
+
+
+def _get_submod_worktree_head(path: Path) -> tuple[bool, str | None, bool]:
+    """Returns (submodule exists, SHA | None, adjusted)"""
+    try:
+        HEAD, corresponding_head = get_worktree_head(path)
+    except ValueError:
+        return False, None, False
+
+    adjusted = corresponding_head is not None
+    if adjusted:
+        # this is a git-annex adjusted branch. do the comparison against
+        # its basis. it is not meaningful to track the managed branch in
+        # a superdataset
+        HEAD = corresponding_head
+    with iter_git_subproc(
+        ['rev-parse', '--path-format=relative',
+         '--show-toplevel', HEAD],
+        cwd=path,
+    ) as r:
+        res = tuple(decode_bytes(itemize(r, sep=None, keep_ends=False)))
+        assert len(res) == 2
+        if res[0].startswith('..'):
+            # this is not a report on a submodule at this location
+            return False, None, adjusted
+        else:
+            return True, res[1], adjusted
+
+
+def _eval_submodule(basepath, item, eval_mode) -> None:
+    """In-place amend GitDiffItem submodule item
+
+    It does nothing with ``eval_mode='no'``.
+    """
+    if eval_mode == 'no':
+        return
+
+    item_path = basepath / item.path
+    # get head commit, and whether a submodule is actually present,
+    # and/or in adjusted mode
+    subds_present, head_commit, adjusted = _get_submod_worktree_head(item_path)
+    if not subds_present:
+        return
+
+    if adjusted:
+        _eval_submodule_adjusted(item_path, item, head_commit, eval_mode)
+    else:
+        _eval_submodule_normal(item_path, item, head_commit, eval_mode)
+
+
+def _eval_submodule_normal(item_path, item, head_commit, eval_mode) -> None:
+    if eval_mode == 'full' and item.status is None or (
+        item.modification_types
+        and GitContainerModificationType.new_commits in item.modification_types
+    ):
+        # if new commits have been detected, the diff-implementation is
+        # not able to report "modified content" at the same time, if it
+        # exists. This requires a dedicated inspection, which conincidentally
+        # is identical to the analysis of an adjusted mode submodule.
+        return _eval_submodule_adjusted(
+            item_path, item, head_commit, eval_mode)
+
+    if item.gitsha != head_commit:
+        item.status = GitDiffStatus.modification
+        item.add_modification_type(GitContainerModificationType.new_commits)
+
+    if eval_mode == 'commit':
+        return
+
+    # check for untracked content (recursively)
+    if _path_has_untracked(item_path):
+        item.status = GitDiffStatus.modification
+        item.add_modification_type(
+            GitContainerModificationType.untracked_content)
+
+
+def _eval_submodule_adjusted(item_path, item, head_commit, eval_mode) -> None:
+    # we cannot rely on the diff-report for a submodule in adjusted mode.
+    # git would make the comparison to the adjusted branch HEAD alone.
+    # this would almost always be invalid, because it is not meaningful to
+    # track a commit in an adjusted branch (it goes away).
+    #
+    # instead, we need to:
+    # - check for a change in the corresponding HEAD to the recorded commit
+    #   in the parent repository, consider any change "new commits"
+    # - check for a diff of the worktree to corresponding HEAD, consider
+    #   any such diff a "modified content"
+    # - and lastly check for untracked content
+
+    # start with "no modification"
+    item.status = None
+    item.modification_types = None
+
+    if item.prev_gitsha != head_commit:
+        item.status = GitDiffStatus.modification
+        item.add_modification_type(GitContainerModificationType.new_commits)
+
+    if eval_mode == 'commit':
+        return
+
+    if any(
+        i.status is not None
+        for i in iter_gitdiff(
+            item_path,
+            from_treeish=head_commit,
+            # worktree
+            to_treeish=None,
+            recursive='repository',
+            find_renames=None,
+            find_copies=None,
+            eval_submodule_state='commit',
+        )
+    ):
+        item.status = GitDiffStatus.modification
+        item.add_modification_type(
+            GitContainerModificationType.modified_content)
+
+    # check for untracked content (recursively)
+    if _path_has_untracked(item_path):
+        item.status = GitDiffStatus.modification
+        item.add_modification_type(
+            GitContainerModificationType.untracked_content)
diff --git a/datalad_next/iter_collections/tests/test_itergitstatus.py b/datalad_next/iter_collections/tests/test_itergitstatus.py
new file mode 100644
index 000000000..9e32f2863
--- /dev/null
+++ b/datalad_next/iter_collections/tests/test_itergitstatus.py
@@ -0,0 +1,349 @@
+from itertools import chain
+import pytest
+
+from datalad_next.datasets import Dataset
+from datalad_next.runners import call_git_success
+from datalad_next.tests.utils import rmtree
+
+from ..gitstatus import (
+    GitDiffStatus,
+    GitContainerModificationType,
+    iter_gitstatus,
+)
+
+
+# we make this module-scope, because we use the same complex test case for all
+# tests here and we trust that nothing in here changes that test case
+@pytest.fixture(scope="module")
+def status_playground(tmp_path_factory):
+    """Produces a dataset with various modifications
+
+    ``git status`` will report::
+
+        ❯ git status -uall
+        On branch dl-test-branch
+        Changes to be committed:
+          (use "git restore --staged <file>..." to unstage)
+                new file:   dir_m/file_a
+                new file:   file_a
+
+        Changes not staged for commit:
+          (use "git add/rm <file>..." to update what will be committed)
+          (use "git restore <file>..." to discard changes in working directory)
+          (commit or discard the untracked or modified content in submodules)
+                deleted:    dir_d/file_d
+                deleted:    dir_m/file_d
+                modified:   dir_m/file_m
+                deleted:    dir_sm/sm_d
+                modified:   dir_sm/sm_m (modified content)
+                modified:   dir_sm/sm_mu (modified content, untracked content)
+                modified:   dir_sm/sm_n (new commits)
+                modified:   dir_sm/sm_nm (new commits, modified content)
+                modified:   dir_sm/sm_nmu (new commits, modified content, untracked content)
+                modified:   dir_sm/sm_u (untracked content)
+                deleted:    file_d
+                modified:   file_m
+
+        Untracked files:
+          (use "git add <file>..." to include in what will be committed)
+                dir_m/dir_u/file_u
+                dir_m/file_u
+                dir_u/file_u
+                file_u
+
+    Suffix indicates the ought-to state (multiple possible):
+
+    a - added
+    c - clean
+    d - deleted
+    n - new commits
+    m - modified
+    u - untracked content
+
+    Prefix indicated the item type:
+
+    file - file
+    sm - submodule
+    dir - directory
+    """
+    ds = Dataset(tmp_path_factory.mktemp("status_playground"))
+    ds.create(result_renderer='disabled')
+    ds_dir = ds.pathobj / 'dir_m'
+    ds_dir.mkdir()
+    ds_dir_d = ds.pathobj / 'dir_d'
+    ds_dir_d.mkdir()
+    (ds_dir / 'file_m').touch()
+    (ds.pathobj / 'file_m').touch()
+    dirsm = ds.pathobj / 'dir_sm'
+    dss = {}
+    for smname in (
+        'sm_d', 'sm_c', 'sm_n', 'sm_m', 'sm_nm', 'sm_u', 'sm_mu', 'sm_nmu',
+        'droppedsm_c',
+    ):
+        sds = Dataset(dirsm / smname).create(result_renderer='disabled')
+        # for the plain modification, commit the reference right here
+        if smname in ('sm_m', 'sm_nm', 'sm_mu', 'sm_nmu'):
+            (sds.pathobj / 'file_m').touch()
+        sds.save(to_git=True, result_renderer='disabled')
+        dss[smname] = sds
+    # files in superdataset to be deleted
+    for d in (ds_dir_d, ds_dir, ds.pathobj):
+        (d / 'file_d').touch()
+    dss['.'] = ds
+    dss['dir'] = ds_dir
+    ds.save(to_git=True, result_renderer='disabled')
+    ds.drop(dirsm / 'droppedsm_c', what='datasets', reckless='availability',
+            result_renderer='disabled')
+    # a new commit
+    for smname in ('.', 'sm_n', 'sm_nm', 'sm_nmu'):
+        sds = dss[smname]
+        (sds.pathobj / 'file_c').touch()
+        sds.save(to_git=True, result_renderer='disabled')
+    # modified file
+    for smname in ('.', 'dir', 'sm_m', 'sm_nm', 'sm_mu', 'sm_nmu'):
+        obj = dss[smname]
+        pobj = obj.pathobj if isinstance(obj, Dataset) else obj
+        (pobj / 'file_m').write_text('modify!')
+    # untracked
+    for smname in ('.', 'dir', 'sm_u', 'sm_mu', 'sm_nmu'):
+        obj = dss[smname]
+        pobj = obj.pathobj if isinstance(obj, Dataset) else obj
+        (pobj / 'file_u').touch()
+        (pobj / 'dirempty_u').mkdir()
+        (pobj / 'dir_u').mkdir()
+        (pobj / 'dir_u' / 'file_u').touch()
+    # delete items
+    rmtree(dss['sm_d'].pathobj)
+    rmtree(ds_dir_d)
+    (ds_dir / 'file_d').unlink()
+    (ds.pathobj / 'file_d').unlink()
+    # added items
+    for smname in ('.', 'dir', 'sm_m', 'sm_nm', 'sm_mu', 'sm_nmu'):
+        obj = dss[smname]
+        pobj = obj.pathobj if isinstance(obj, Dataset) else obj
+        (pobj / 'file_a').write_text('added')
+        assert call_git_success(['add', 'file_a'], cwd=pobj)
+
+    yield ds
+
+
+def test_status_homogeneity(status_playground):
+    """Test things that should always be true, no matter the precise
+    parameterization
+
+    A main purpose of this test is also to exercise all (main) code paths.
+    """
+    ds = status_playground
+    for kwargs in (
+        # default
+        dict(path=ds.pathobj),
+        dict(path=ds.pathobj, recursive='no'),
+        dict(path=ds.pathobj, recursive='repository'),
+        dict(path=ds.pathobj, recursive='submodules'),
+        # same as above, but with the submodules in the root
+        dict(path=ds.pathobj / 'dir_sm', recursive='no'),
+        dict(path=ds.pathobj / 'dir_sm', recursive='repository'),
+        dict(path=ds.pathobj / 'dir_sm', recursive='submodules'),
+        # no submodule state
+        dict(path=ds.pathobj, eval_submodule_state='no', recursive='no'),
+        dict(path=ds.pathobj, eval_submodule_state='no', recursive='repository'),
+        dict(path=ds.pathobj, eval_submodule_state='no', recursive='submodules'),
+        # just the commit
+        dict(path=ds.pathobj, eval_submodule_state='commit', recursive='no'),
+        dict(path=ds.pathobj, eval_submodule_state='commit', recursive='repository'),
+        dict(path=ds.pathobj, eval_submodule_state='commit', recursive='submodules'),
+        # without untracked
+        dict(path=ds.pathobj, untracked='no', recursive='no'),
+        dict(path=ds.pathobj, untracked='no', recursive='repository'),
+        dict(path=ds.pathobj, untracked='no', recursive='submodules'),
+        # special untracked modes
+        dict(path=ds.pathobj, untracked='whole-dir', recursive='no'),
+        dict(path=ds.pathobj, untracked='whole-dir', recursive='repository'),
+        dict(path=ds.pathobj, untracked='whole-dir', recursive='submodules'),
+        dict(path=ds.pathobj, untracked='no-empty-dir', recursive='no'),
+        dict(path=ds.pathobj, untracked='no-empty-dir', recursive='repository'),
+        dict(path=ds.pathobj, untracked='no-empty-dir', recursive='submodules'),
+        # call in the mountpoint of a dropped submodule
+        dict(path=ds.pathobj / 'dir_sm' / 'droppedsm_c'),
+    ):
+        st = {item.name: item for item in iter_gitstatus(**kwargs)}
+        # we get no report on anything clean (implicitly also tests
+        # whether all item names are plain strings
+        assert all(not i.name.endswith('_c') for i in st.values())
+
+        # anything untracked is labeled as such
+        assert all(
+            i.status == GitDiffStatus.other
+            # we would not see a submodule modification qualifier when instructed
+            # not to evaluate a submodule
+            or kwargs.get('eval_submodule_state') in ('no', 'commit')
+            or GitContainerModificationType.untracked_content in i.modification_types
+            for i in st.values()
+            if 'u' in i.path.name.split('_')[1]
+        )
+
+        # anything modified is labeled as such
+        assert all(
+            i.status == GitDiffStatus.modification
+            for i in st.values()
+            if 'm' in i.path.name.split('_')[1]
+        )
+
+        # anything deleted is labeled as such
+        assert all(
+            i.status == GitDiffStatus.deletion
+            for i in st.values()
+            if 'd' in i.path.name.split('_')[1]
+        )
+
+
+def test_status_invalid_params(existing_dataset):
+    ds = existing_dataset
+    with pytest.raises(ValueError):
+        list(iter_gitstatus(ds.pathobj, recursive='fromspace'))
+
+
+test_cases_repository_recursion = [
+    {'name': 'file_a', 'status': GitDiffStatus.addition},
+    {'name': 'dir_m/file_a', 'status': GitDiffStatus.addition},
+    {'name': 'file_u', 'status': GitDiffStatus.other},
+    {'name': 'dir_u/file_u', 'status': GitDiffStatus.other},
+    {'name': 'dir_m/file_u', 'status': GitDiffStatus.other},
+    {'name': 'dir_m/dir_u/file_u', 'status': GitDiffStatus.other},
+    {'name': 'file_d', 'status': GitDiffStatus.deletion},
+    {'name': 'dir_d/file_d', 'status': GitDiffStatus.deletion},
+    {'name': 'dir_m/file_d', 'status': GitDiffStatus.deletion},
+    {'name': 'file_m', 'status': GitDiffStatus.modification},
+    {'name': 'dir_m/file_m', 'status': GitDiffStatus.modification},
+    {'name': 'dir_sm/sm_d', 'status': GitDiffStatus.deletion},
+    {'name': 'dir_sm/sm_n', 'status': GitDiffStatus.modification,
+     'qual': (GitContainerModificationType.new_commits,)},
+    {'name': 'dir_sm/sm_m', 'status': GitDiffStatus.modification,
+     'qual': (GitContainerModificationType.modified_content,)},
+    {'name': 'dir_sm/sm_nm', 'status': GitDiffStatus.modification,
+     'qual': (GitContainerModificationType.modified_content,
+              GitContainerModificationType.new_commits)},
+    {'name': 'dir_sm/sm_nmu', 'status': GitDiffStatus.modification,
+     'qual': (GitContainerModificationType.modified_content,
+              GitContainerModificationType.untracked_content,
+              GitContainerModificationType.new_commits)},
+    {'name': 'dir_sm/sm_u', 'status': GitDiffStatus.modification,
+     'qual': (GitContainerModificationType.untracked_content,)},
+    {'name': 'dir_sm/sm_mu', 'status': GitDiffStatus.modification,
+     'qual': (GitContainerModificationType.modified_content,
+              GitContainerModificationType.untracked_content)},
+]
+
+test_cases_submodule_recursion = [
+    {'name': 'dir_sm/sm_m/file_a', 'status': GitDiffStatus.addition},
+    {'name': 'dir_sm/sm_nm/file_a', 'status': GitDiffStatus.addition},
+    {'name': 'dir_sm/sm_mu/file_a', 'status': GitDiffStatus.addition},
+    {'name': 'dir_sm/sm_nmu/file_a', 'status': GitDiffStatus.addition},
+    {'name': 'dir_sm/sm_m/file_m', 'status': GitDiffStatus.modification},
+    {'name': 'dir_sm/sm_mu/file_m', 'status': GitDiffStatus.modification},
+    {'name': 'dir_sm/sm_nmu/file_m', 'status': GitDiffStatus.modification},
+    {'name': 'dir_sm/sm_u/file_u', 'status': GitDiffStatus.other},
+    {'name': 'dir_sm/sm_mu/file_u', 'status': GitDiffStatus.other},
+    {'name': 'dir_sm/sm_nmu/file_u', 'status': GitDiffStatus.other},
+    {'name': 'dir_sm/sm_u/dir_u/file_u', 'status': GitDiffStatus.other},
+    {'name': 'dir_sm/sm_mu/dir_u/file_u', 'status': GitDiffStatus.other},
+    {'name': 'dir_sm/sm_nmu/dir_u/file_u', 'status': GitDiffStatus.other},
+]
+
+
+def _assert_testcases(st, tc):
+    for c in tc:
+        assert st[c['name']].status == c['status']
+        mod_types = st[c['name']].modification_types
+        if 'qual' in c:
+            assert set(mod_types) == set(c['qual'])
+        else:
+            assert mod_types is None
+
+
+def test_status_vs_git(status_playground):
+    """Implements a comparison against how git-status behaved when
+    the test was written  (see fixture docstring)
+    """
+    st = {
+        item.name: item
+        for item in iter_gitstatus(
+            path=status_playground.pathobj, recursive='repository',
+            eval_submodule_state='full', untracked='all',
+        )
+    }
+    _assert_testcases(st, test_cases_repository_recursion)
+
+
+def test_status_norec(status_playground):
+    st = {
+        item.name: item
+        for item in iter_gitstatus(
+            path=status_playground.pathobj, recursive='no',
+            eval_submodule_state='full', untracked='all',
+        )
+    }
+    test_cases = [
+        {'name': 'file_a', 'status': GitDiffStatus.addition},
+        {'name': 'dir_d', 'status': GitDiffStatus.deletion},
+        {'name': 'dir_m', 'status': GitDiffStatus.modification,
+         'qual': (GitContainerModificationType.modified_content,
+                  GitContainerModificationType.untracked_content)},
+        {'name': 'dir_sm', 'status': GitDiffStatus.modification,
+         'qual': (GitContainerModificationType.modified_content,
+                  GitContainerModificationType.untracked_content)},
+        {'name': 'file_d', 'status': GitDiffStatus.deletion},
+        {'name': 'file_m', 'status': GitDiffStatus.modification},
+        {'name': 'dir_u', 'status': GitDiffStatus.other},
+        {'name': 'file_u', 'status': GitDiffStatus.other},
+    ]
+    _assert_testcases(st, test_cases)
+
+
+def test_status_smrec(status_playground):
+    st = {
+        item.name: item
+        for item in iter_gitstatus(
+            path=status_playground.pathobj, recursive='submodules',
+            eval_submodule_state='full', untracked='all',
+        )
+    }
+    # in this mode we expect ALL results of a 'repository' mode recursion,
+    # including the submodule-type items, plus additional ones from within
+    # the submodules
+    _assert_testcases(st, chain(test_cases_repository_recursion,
+                                test_cases_submodule_recursion))
+
+
+def test_status_monorec(status_playground):
+    st = {
+        item.name: item
+        for item in iter_gitstatus(
+            path=status_playground.pathobj, recursive='monolithic',
+            eval_submodule_state='full', untracked='all',
+        )
+    }
+    # in this mode we expect ALL results of a 'repository' mode recursion,
+    # including the submodule-type items, plus additional ones from within
+    # the submodules
+    _assert_testcases(
+        st,
+        # repository and recursive test cases, minus any direct submodule
+        # items
+        [c for c in chain(test_cases_repository_recursion,
+                          test_cases_submodule_recursion)
+         if not c['name'].split('/')[-1].split('_')[0] == 'sm'])
+
+
+def test_status_gitinit(tmp_path):
+    # initialize a fresh git repo, but make no commits
+    assert call_git_success(['init'], cwd=tmp_path)
+    for recmode in ('no', 'repository', 'submodules'):
+        assert [] == list(iter_gitstatus(tmp_path, recursive=recmode))
+    # untracked reporting must be working normal
+    (tmp_path / 'untracked').touch()
+    for recmode in ('no', 'repository', 'submodules'):
+        res = list(iter_gitstatus(tmp_path, recursive=recmode))
+        assert len(res) == 1
+        assert res[0].name == 'untracked'
+        assert res[0].status == GitDiffStatus.other
diff --git a/docs/source/api.rst b/docs/source/api.rst
index bd4fa567c..f75f34a2b 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -9,4 +9,5 @@ High-level API commands
    credentials
    download
    ls_file_collection
+   next_status
    tree
diff --git a/docs/source/cmd.rst b/docs/source/cmd.rst
index ce3d3a4c3..3bae9c82c 100644
--- a/docs/source/cmd.rst
+++ b/docs/source/cmd.rst
@@ -8,4 +8,5 @@ Command line reference
    generated/man/datalad-credentials
    generated/man/datalad-download
    generated/man/datalad-ls-file-collection
+   generated/man/datalad-next-status
    generated/man/datalad-tree