Skip to content

Commit

Permalink
path-walk: reorder object visits
Browse files Browse the repository at this point in the history
The path-walk API currently uses a stack-based approach to recursing
through the list of paths within the repository. This guarantees that
after a tree path is explored, all paths contained within that tree path
will be explored before continuing to explore siblings of that tree
path.

The initial motivation of this depth-first approach was to minimize
memory pressure while exploring the repository. A breadth-first approach
would have too many "active" paths being stored in the paths_to_lists
map.

We can take this approach one step further by making sure that blob
paths are visited before tree paths. This allows the API to free the
memory for these blob objects before continuing to perform the
depth-first search. This modifies the order in which we visit siblings,
but does not change the fact that we are performing depth-first search.

To achieve this goal, use a priority queue with a custom sorting method.
The sort needs to handle tags, blobs, and trees (commits are handled
slightly differently). When objects share a type, we can sort by path
name. This will keep children of the latest path to leave the stack be
preferred over the rest of the paths in the stack, since they agree in
prefix up to and including a directory separator. When the types are
different, we can prefer tags over other types and blobs over trees.

This causes significant adjustments to t6601-path-walk.sh to rearrange
the order of the visited paths.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
  • Loading branch information
derrickstolee committed Dec 18, 2024
1 parent f2ffc32 commit ef54342
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 74 deletions.
60 changes: 48 additions & 12 deletions path-walk.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "list-objects.h"
#include "object.h"
#include "oid-array.h"
#include "prio-queue.h"
#include "revision.h"
#include "string-list.h"
#include "strmap.h"
Expand Down Expand Up @@ -49,24 +50,58 @@ struct path_walk_context {
struct strmap paths_to_lists;

/**
* Store the current list of paths in a stack, to
* facilitate depth-first-search without recursion.
* Store the current list of paths in a priority queue,
* using object type as a sorting mechanism, mostly to
* make sure blobs are popped off the stack first. No
* other sort is made, so within each object type it acts
* like a stack and performs a DFS within the trees.
*
* Use path_stack_pushed to indicate whether a path
* was previously added to path_stack.
*/
struct string_list path_stack;
struct prio_queue path_stack;
struct strset path_stack_pushed;
};

static int compare_by_type(const void *one, const void *two, void *cb_data)
{
struct type_and_oid_list *list1, *list2;
const char *str1 = one;
const char *str2 = two;
struct path_walk_context *ctx = cb_data;

list1 = strmap_get(&ctx->paths_to_lists, str1);
list2 = strmap_get(&ctx->paths_to_lists, str2);

/*
* If object types are equal, then use path comparison.
*/
if (!list1 || !list2 || list1->type == list2->type)
return strcmp(str1, str2);

/* Prefer tags to be popped off first. */
if (list1->type == OBJ_TAG)
return -1;
if (list2->type == OBJ_TAG)
return 1;

/* Prefer blobs to be popped off second. */
if (list1->type == OBJ_BLOB)
return -1;
if (list2->type == OBJ_BLOB)
return 1;

return 0;
}

static void push_to_stack(struct path_walk_context *ctx,
const char *path)
{
if (strset_contains(&ctx->path_stack_pushed, path))
return;

strset_add(&ctx->path_stack_pushed, path);
string_list_append(&ctx->path_stack, path);
prio_queue_put(&ctx->path_stack, xstrdup(path));
}

static int add_tree_entries(struct path_walk_context *ctx,
Expand Down Expand Up @@ -378,8 +413,8 @@ static int setup_pending_objects(struct path_walk_info *info,
const char *tagged_blob_path = "/tagged-blobs";
tagged_blobs->type = OBJ_BLOB;
tagged_blobs->maybe_interesting = 1;
push_to_stack(ctx, tagged_blob_path);
strmap_put(&ctx->paths_to_lists, tagged_blob_path, tagged_blobs);
push_to_stack(ctx, tagged_blob_path);
} else {
oid_array_clear(&tagged_blobs->oids);
free(tagged_blobs);
Expand All @@ -390,8 +425,8 @@ static int setup_pending_objects(struct path_walk_info *info,
const char *tag_path = "/tags";
tags->type = OBJ_TAG;
tags->maybe_interesting = 1;
push_to_stack(ctx, tag_path);
strmap_put(&ctx->paths_to_lists, tag_path, tags);
push_to_stack(ctx, tag_path);
} else {
oid_array_clear(&tags->oids);
free(tags);
Expand All @@ -418,7 +453,10 @@ int walk_objects_by_path(struct path_walk_info *info)
.repo = info->revs->repo,
.revs = info->revs,
.info = info,
.path_stack = STRING_LIST_INIT_DUP,
.path_stack = {
.compare = compare_by_type,
.cb_data = &ctx
},
.path_stack_pushed = STRSET_INIT,
.paths_to_lists = STRMAP_INIT
};
Expand Down Expand Up @@ -504,8 +542,7 @@ int walk_objects_by_path(struct path_walk_info *info)

trace2_region_enter("path-walk", "path-walk", info->revs->repo);
while (!ret && ctx.path_stack.nr) {
char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string;
ctx.path_stack.nr--;
char *path = prio_queue_get(&ctx.path_stack);
paths_nr++;

ret = walk_path(&ctx, path);
Expand All @@ -522,8 +559,7 @@ int walk_objects_by_path(struct path_walk_info *info)
push_to_stack(&ctx, entry->key);

while (!ret && ctx.path_stack.nr) {
char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string;
ctx.path_stack.nr--;
char *path = prio_queue_get(&ctx.path_stack);
paths_nr++;

ret = walk_path(&ctx, path);
Expand All @@ -537,7 +573,7 @@ int walk_objects_by_path(struct path_walk_info *info)

clear_paths_to_lists(&ctx.paths_to_lists);
strset_clear(&ctx.path_stack_pushed);
string_list_clear(&ctx.path_stack, 0);
clear_prio_queue(&ctx.path_stack);
return ret;
}

Expand Down
124 changes: 62 additions & 62 deletions t/t6601-path-walk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,20 +84,20 @@ test_expect_success 'all' '
3:tree::$(git rev-parse refs/tags/tree-tag^{})
3:tree::$(git rev-parse refs/tags/tree-tag2^{})
4:blob:a:$(git rev-parse base~2:a)
5:tree:right/:$(git rev-parse topic:right)
5:tree:right/:$(git rev-parse base~1:right)
5:tree:right/:$(git rev-parse base~2:right)
6:blob:right/d:$(git rev-parse base~1:right/d)
7:blob:right/c:$(git rev-parse base~2:right/c)
7:blob:right/c:$(git rev-parse topic:right/c)
8:tree:left/:$(git rev-parse base:left)
8:tree:left/:$(git rev-parse base~2:left)
9:blob:left/b:$(git rev-parse base~2:left/b)
9:blob:left/b:$(git rev-parse base:left/b)
10:tree:a/:$(git rev-parse base:a)
11:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2)
12:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
13:blob:child/file:$(git rev-parse refs/tags/tree-tag:child/file)
5:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2)
6:tree:a/:$(git rev-parse base:a)
7:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
8:blob:child/file:$(git rev-parse refs/tags/tree-tag:child/file)
9:tree:left/:$(git rev-parse base:left)
9:tree:left/:$(git rev-parse base~2:left)
10:blob:left/b:$(git rev-parse base~2:left/b)
10:blob:left/b:$(git rev-parse base:left/b)
11:tree:right/:$(git rev-parse topic:right)
11:tree:right/:$(git rev-parse base~1:right)
11:tree:right/:$(git rev-parse base~2:right)
12:blob:right/c:$(git rev-parse base~2:right/c)
12:blob:right/c:$(git rev-parse topic:right/c)
13:blob:right/d:$(git rev-parse base~1:right/d)
blobs:10
commits:4
tags:7
Expand Down Expand Up @@ -154,19 +154,19 @@ test_expect_success 'branches and indexed objects mix well' '
1:tree::$(git rev-parse base^{tree})
1:tree::$(git rev-parse base~1^{tree})
1:tree::$(git rev-parse base~2^{tree})
2:blob:a:$(git rev-parse base~2:a)
3:tree:right/:$(git rev-parse topic:right)
3:tree:right/:$(git rev-parse base~1:right)
3:tree:right/:$(git rev-parse base~2:right)
4:blob:right/d:$(git rev-parse base~1:right/d)
4:blob:right/d:$(git rev-parse :right/d)
5:blob:right/c:$(git rev-parse base~2:right/c)
5:blob:right/c:$(git rev-parse topic:right/c)
6:tree:left/:$(git rev-parse base:left)
6:tree:left/:$(git rev-parse base~2:left)
7:blob:left/b:$(git rev-parse base:left/b)
7:blob:left/b:$(git rev-parse base~2:left/b)
8:tree:a/:$(git rev-parse refs/tags/third:a)
2:tree:a/:$(git rev-parse refs/tags/third:a)
3:tree:left/:$(git rev-parse base:left)
3:tree:left/:$(git rev-parse base~2:left)
4:blob:left/b:$(git rev-parse base:left/b)
4:blob:left/b:$(git rev-parse base~2:left/b)
5:tree:right/:$(git rev-parse topic:right)
5:tree:right/:$(git rev-parse base~1:right)
5:tree:right/:$(git rev-parse base~2:right)
6:blob:right/c:$(git rev-parse base~2:right/c)
6:blob:right/c:$(git rev-parse topic:right/c)
7:blob:right/d:$(git rev-parse base~1:right/d)
7:blob:right/d:$(git rev-parse :right/d)
8:blob:a:$(git rev-parse base~2:a)
blobs:7
commits:4
tags:0
Expand All @@ -186,15 +186,15 @@ test_expect_success 'topic only' '
1:tree::$(git rev-parse topic^{tree})
1:tree::$(git rev-parse base~1^{tree})
1:tree::$(git rev-parse base~2^{tree})
2:tree:right/:$(git rev-parse topic:right)
2:tree:right/:$(git rev-parse base~1:right)
2:tree:right/:$(git rev-parse base~2:right)
3:blob:right/d:$(git rev-parse base~1:right/d)
4:blob:right/c:$(git rev-parse base~2:right/c)
4:blob:right/c:$(git rev-parse topic:right/c)
5:tree:left/:$(git rev-parse base~2:left)
6:blob:left/b:$(git rev-parse base~2:left/b)
7:blob:a:$(git rev-parse base~2:a)
2:blob:a:$(git rev-parse base~2:a)
3:tree:left/:$(git rev-parse base~2:left)
4:blob:left/b:$(git rev-parse base~2:left/b)
5:tree:right/:$(git rev-parse topic:right)
5:tree:right/:$(git rev-parse base~1:right)
5:tree:right/:$(git rev-parse base~2:right)
6:blob:right/c:$(git rev-parse base~2:right/c)
6:blob:right/c:$(git rev-parse topic:right/c)
7:blob:right/d:$(git rev-parse base~1:right/d)
blobs:5
commits:3
tags:0
Expand All @@ -210,12 +210,12 @@ test_expect_success 'topic, not base' '
cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
1:tree::$(git rev-parse topic^{tree})
2:tree:right/:$(git rev-parse topic:right)
3:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
4:blob:right/c:$(git rev-parse topic:right/c)
5:tree:left/:$(git rev-parse topic:left):UNINTERESTING
6:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
7:blob:a:$(git rev-parse topic:a):UNINTERESTING
2:blob:a:$(git rev-parse topic:a):UNINTERESTING
3:tree:left/:$(git rev-parse topic:left):UNINTERESTING
4:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
5:tree:right/:$(git rev-parse topic:right)
6:blob:right/c:$(git rev-parse topic:right/c)
7:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
blobs:4
commits:1
tags:0
Expand All @@ -233,12 +233,12 @@ test_expect_success 'fourth, blob-tag2, not base' '
1:tag:/tags:$(git rev-parse fourth)
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
3:tree::$(git rev-parse topic^{tree})
4:tree:right/:$(git rev-parse topic:right)
5:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
6:blob:right/c:$(git rev-parse topic:right/c)
7:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
8:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
9:blob:a:$(git rev-parse base~1:a):UNINTERESTING
4:blob:a:$(git rev-parse base~1:a):UNINTERESTING
5:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
6:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
7:tree:right/:$(git rev-parse topic:right)
8:blob:right/c:$(git rev-parse topic:right/c)
9:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
blobs:5
commits:1
tags:1
Expand All @@ -253,10 +253,10 @@ test_expect_success 'topic, not base, only blobs' '
-- topic --not base >out &&
cat >expect <<-EOF &&
0:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
1:blob:right/c:$(git rev-parse topic:right/c)
2:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
3:blob:a:$(git rev-parse topic:a):UNINTERESTING
0:blob:a:$(git rev-parse topic:a):UNINTERESTING
1:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
2:blob:right/c:$(git rev-parse topic:right/c)
3:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
blobs:4
commits:0
tags:0
Expand Down Expand Up @@ -289,8 +289,8 @@ test_expect_success 'topic, not base, only trees' '
cat >expect <<-EOF &&
0:tree::$(git rev-parse topic^{tree})
1:tree:right/:$(git rev-parse topic:right)
2:tree:left/:$(git rev-parse topic:left):UNINTERESTING
1:tree:left/:$(git rev-parse topic:left):UNINTERESTING
2:tree:right/:$(git rev-parse topic:right)
commits:0
blobs:0
tags:0
Expand All @@ -308,14 +308,14 @@ test_expect_success 'topic, not base, boundary' '
0:commit::$(git rev-parse base~1):UNINTERESTING
1:tree::$(git rev-parse topic^{tree})
1:tree::$(git rev-parse base~1^{tree}):UNINTERESTING
2:tree:right/:$(git rev-parse topic:right)
2:tree:right/:$(git rev-parse base~1:right):UNINTERESTING
3:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
4:blob:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
4:blob:right/c:$(git rev-parse topic:right/c)
5:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
6:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
7:blob:a:$(git rev-parse base~1:a):UNINTERESTING
2:blob:a:$(git rev-parse base~1:a):UNINTERESTING
3:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
4:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
5:tree:right/:$(git rev-parse topic:right)
5:tree:right/:$(git rev-parse base~1:right):UNINTERESTING
6:blob:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
6:blob:right/c:$(git rev-parse topic:right/c)
7:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
blobs:5
commits:2
tags:0
Expand Down

0 comments on commit ef54342

Please sign in to comment.