Skip to content

Commit

Permalink
path-walk: add prune_all_uninteresting option
Browse files Browse the repository at this point in the history
This option causes the path-walk API to act like the sparse tree-walk
algorithm implemented by mark_trees_uninteresting_sparse() in
list-objects.c.

Starting from the commits marked as UNINTERESTING, their root trees and
all objects reachable from those trees are UNINTERSTING, at least as we
walk path-by-path. When we reach a path where all objects associated
with that path are marked UNINTERESTING, then do no continue walking the
children of that path.

We need to be careful to pass the UNINTERESTING flag in a deep way on
the UNINTERESTING objects before we start the path-walk, or else the
depth-first search for the path-walk API may accidentally report some
objects as interesting.

Signed-off-by: Derrick Stolee <[email protected]>
  • Loading branch information
derrickstolee authored and dscho committed Dec 30, 2024
1 parent 22bf55a commit c828be5
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 13 deletions.
8 changes: 8 additions & 0 deletions Documentation/technical/api-path-walk.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ commits are emitted.
While it is possible to walk only commits in this way, consumers would be
better off using the revision walk API instead.

`prune_all_uninteresting`::
By default, all reachable paths are emitted by the path-walk API.
This option allows consumers to declare that they are not
interested in paths where all included objects are marked with the
`UNINTERESTING` flag. This requires using the `boundary` option in
the revision walk so that the walk emits commits marked with the
`UNINTERESTING` flag.

Examples
--------

Expand Down
67 changes: 64 additions & 3 deletions path-walk.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ struct type_and_oid_list
{
enum object_type type;
struct oid_array oids;
int maybe_interesting;
};

#define TYPE_AND_OID_LIST_INIT { \
Expand Down Expand Up @@ -124,6 +125,8 @@ static int add_children(struct path_walk_context *ctx,
strmap_put(&ctx->paths_to_lists, path.buf, list);
string_list_append(&ctx->path_stack, path.buf);
}
if (!(o->flags & UNINTERESTING))
list->maybe_interesting = 1;
oid_array_append(&list->oids, &entry.oid);
}

Expand All @@ -145,6 +148,40 @@ static int walk_path(struct path_walk_context *ctx,

list = strmap_get(&ctx->paths_to_lists, path);

if (ctx->info->prune_all_uninteresting) {
/*
* This is true if all objects were UNINTERESTING
* when added to the list.
*/
if (!list->maybe_interesting)
return 0;

/*
* But it's still possible that the objects were set
* as UNINTERESTING after being added. Do a quick check.
*/
list->maybe_interesting = 0;
for (size_t i = 0;
!list->maybe_interesting && i < list->oids.nr;
i++) {
if (list->type == OBJ_TREE) {
struct tree *t = lookup_tree(ctx->repo,
&list->oids.oid[i]);
if (t && !(t->object.flags & UNINTERESTING))
list->maybe_interesting = 1;
} else {
struct blob *b = lookup_blob(ctx->repo,
&list->oids.oid[i]);
if (b && !(b->object.flags & UNINTERESTING))
list->maybe_interesting = 1;
}
}

/* We have confirmed that all objects are UNINTERESTING. */
if (!list->maybe_interesting)
return 0;
}

/* Evaluate function pointer on this data, if requested. */
if ((list->type == OBJ_TREE && ctx->info->trees) ||
(list->type == OBJ_BLOB && ctx->info->blobs))
Expand Down Expand Up @@ -187,7 +224,7 @@ static void clear_strmap(struct strmap *map)
int walk_objects_by_path(struct path_walk_info *info)
{
const char *root_path = "";
int ret = 0;
int ret = 0, has_uninteresting = 0;
size_t commits_nr = 0, paths_nr = 0;
struct commit *c;
struct type_and_oid_list *root_tree_list;
Expand All @@ -199,6 +236,7 @@ int walk_objects_by_path(struct path_walk_info *info)
.path_stack = STRING_LIST_INIT_DUP,
.paths_to_lists = STRMAP_INIT
};
struct oidset root_tree_set = OIDSET_INIT;

trace2_region_enter("path-walk", "commit-walk", info->revs->repo);

Expand All @@ -211,6 +249,7 @@ int walk_objects_by_path(struct path_walk_info *info)
/* Insert a single list for the root tree into the paths. */
CALLOC_ARRAY(root_tree_list, 1);
root_tree_list->type = OBJ_TREE;
root_tree_list->maybe_interesting = 1;
strmap_put(&ctx.paths_to_lists, root_path, root_tree_list);

/*
Expand Down Expand Up @@ -301,10 +340,17 @@ int walk_objects_by_path(struct path_walk_info *info)
oid = get_commit_tree_oid(c);
t = lookup_tree(info->revs->repo, oid);

if (t)
if (t) {
oidset_insert(&root_tree_set, oid);
oid_array_append(&root_tree_list->oids, oid);
else
} else {
warning("could not find tree %s", oid_to_hex(oid));
}

if (t && (c->object.flags & UNINTERESTING)) {
t->object.flags |= UNINTERESTING;
has_uninteresting = 1;
}
}

trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr);
Expand All @@ -317,6 +363,21 @@ int walk_objects_by_path(struct path_walk_info *info)
oid_array_clear(&commit_list->oids);
free(commit_list);

/*
* Before performing a DFS of our paths and emitting them as interesting,
* do a full walk of the trees to distribute the UNINTERESTING bit. Use
* the sparse algorithm if prune_all_uninteresting was set.
*/
if (has_uninteresting) {
trace2_region_enter("path-walk", "uninteresting-walk", info->revs->repo);
if (info->prune_all_uninteresting)
mark_trees_uninteresting_sparse(ctx.repo, &root_tree_set);
else
mark_trees_uninteresting_dense(ctx.repo, &root_tree_set);
trace2_region_leave("path-walk", "uninteresting-walk", info->revs->repo);
}
oidset_clear(&root_tree_set);

string_list_append(&ctx.path_stack, root_path);

trace2_region_enter("path-walk", "path-walk", info->revs->repo);
Expand Down
8 changes: 8 additions & 0 deletions path-walk.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ struct path_walk_info {
int trees;
int blobs;
int tags;

/**
* When 'prune_all_uninteresting' is set and a path has all objects
* marked as UNINTERESTING, then the path-walk will not visit those
* objects. It will not call path_fn on those objects and will not
* walk the children of such trees.
*/
int prune_all_uninteresting;
};

#define PATH_WALK_INFO_INIT { \
Expand Down
10 changes: 8 additions & 2 deletions t/helper/test-path-walk.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,12 @@ static int emit_block(const char *path, struct oid_array *oids,
BUG("we do not understand this type");
}

for (size_t i = 0; i < oids->nr; i++)
printf("%s:%s:%s\n", typestr, path, oid_to_hex(&oids->oid[i]));
for (size_t i = 0; i < oids->nr; i++) {
struct object *o = lookup_unknown_object(the_repository,
&oids->oid[i]);
printf("%s:%s:%s%s\n", typestr, path, oid_to_hex(&oids->oid[i]),
o->flags & UNINTERESTING ? ":UNINTERESTING" : "");
}

return 0;
}
Expand All @@ -76,6 +80,8 @@ int cmd__path_walk(int argc, const char **argv)
N_("toggle inclusion of tag objects")),
OPT_BOOL(0, "trees", &info.trees,
N_("toggle inclusion of tree objects")),
OPT_BOOL(0, "prune", &info.prune_all_uninteresting,
N_("toggle pruning of uninteresting paths")),
OPT_END(),
};

Expand Down
40 changes: 32 additions & 8 deletions t/t6601-path-walk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -229,19 +229,19 @@ test_expect_success 'topic, not base, boundary' '
cat >expect <<-EOF &&
COMMIT::$(git rev-parse topic)
COMMIT::$(git rev-parse base~1)
COMMIT::$(git rev-parse base~1):UNINTERESTING
commits:2
TREE::$(git rev-parse topic^{tree})
TREE::$(git rev-parse base~1^{tree})
TREE:left/:$(git rev-parse base~1:left)
TREE::$(git rev-parse base~1^{tree}):UNINTERESTING
TREE:left/:$(git rev-parse base~1:left):UNINTERESTING
TREE:right/:$(git rev-parse topic:right)
TREE:right/:$(git rev-parse base~1:right)
TREE:right/:$(git rev-parse base~1:right):UNINTERESTING
trees:5
BLOB:a:$(git rev-parse base~1:a)
BLOB:left/b:$(git rev-parse base~1:left/b)
BLOB:right/c:$(git rev-parse base~1:right/c)
BLOB:a:$(git rev-parse base~1:a):UNINTERESTING
BLOB:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
BLOB:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
BLOB:right/c:$(git rev-parse topic:right/c)
BLOB:right/d:$(git rev-parse base~1:right/d)
BLOB:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
blobs:5
tags:0
EOF
Expand All @@ -252,4 +252,28 @@ test_expect_success 'topic, not base, boundary' '
test_cmp expect.sorted out.sorted
'

test_expect_success 'topic, not base, boundary with pruning' '
test-tool path-walk --prune -- --boundary topic --not base >out &&
cat >expect <<-EOF &&
COMMIT::$(git rev-parse topic)
COMMIT::$(git rev-parse base~1):UNINTERESTING
commits:2
TREE::$(git rev-parse topic^{tree})
TREE::$(git rev-parse base~1^{tree}):UNINTERESTING
TREE:right/:$(git rev-parse topic:right)
TREE:right/:$(git rev-parse base~1:right):UNINTERESTING
trees:4
BLOB:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
BLOB:right/c:$(git rev-parse topic:right/c)
blobs:2
tags:0
EOF
sort expect >expect.sorted &&
sort out >out.sorted &&
test_cmp expect.sorted out.sorted
'

test_done

0 comments on commit c828be5

Please sign in to comment.