forked from git/git
-
Notifications
You must be signed in to change notification settings - Fork 137
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
backfill: basic functionality and tests
The default behavior of 'git backfill' is to fetch all missing blobs that are reachable from HEAD. Document and test this behavior. The implementation is a very simple use of the path-walk API, initializing the revision walk at HEAD to start the path-walk from all commits reachable from HEAD. Ignore the object arrays that correspond to tree entries, assuming that they are all present already. The path-walk API provides lists of objects in batches according to a common path, but that list could be very small. We want to balance the number of requests to the server with the ability to have the process interrupted with minimal repeated work to catch up in the next run. Based on some experiments (detailed in the next change) a minimum batch size of 50,000 is selected for the default. This batch size is a _minimum_. As the path-walk API emits lists of blob IDs, they are collected into a list of objects for a request to the server. When that list is at least the minimum batch size, then the request is sent to the server for the new objects. However, the list of blob IDs from the path-walk API could be much longer than the batch size. At this moment, it is unclear if there is a benefit to split the list when there are too many objects at the same path. Signed-off-by: Derrick Stolee <stolee@gmail.com>
- Loading branch information
1 parent
0300aa1
commit 5728dd2
Showing
4 changed files
with
221 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
#!/bin/sh | ||
|
||
test_description='git backfill on partial clones' | ||
|
||
GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main | ||
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME | ||
|
||
. ./test-lib.sh | ||
|
||
# We create objects in the 'src' repo. | ||
test_expect_success 'setup repo for object creation' ' | ||
echo "{print \$1}" >print_1.awk && | ||
echo "{print \$2}" >print_2.awk && | ||
git init src && | ||
mkdir -p src/a/b/c && | ||
mkdir -p src/d/e && | ||
for i in 1 2 | ||
do | ||
for n in 1 2 3 4 | ||
do | ||
echo "Version $i of file $n" > src/file.$n.txt && | ||
echo "Version $i of file a/$n" > src/a/file.$n.txt && | ||
echo "Version $i of file a/b/$n" > src/a/b/file.$n.txt && | ||
echo "Version $i of file a/b/c/$n" > src/a/b/c/file.$n.txt && | ||
echo "Version $i of file d/$n" > src/d/file.$n.txt && | ||
echo "Version $i of file d/e/$n" > src/d/e/file.$n.txt && | ||
git -C src add . && | ||
git -C src commit -m "Iteration $n" || return 1 | ||
done | ||
done | ||
' | ||
|
||
# Clone 'src' into 'srv.bare' so we have a bare repo to be our origin | ||
# server for the partial clone. | ||
test_expect_success 'setup bare clone for server' ' | ||
git clone --bare "file://$(pwd)/src" srv.bare && | ||
git -C srv.bare config --local uploadpack.allowfilter 1 && | ||
git -C srv.bare config --local uploadpack.allowanysha1inwant 1 | ||
' | ||
|
||
# do basic partial clone from "srv.bare" | ||
test_expect_success 'do partial clone 1, backfill gets all objects' ' | ||
git clone --no-checkout --filter=blob:none \ | ||
--single-branch --branch=main \ | ||
"file://$(pwd)/srv.bare" backfill1 && | ||
# Backfill with no options gets everything reachable from HEAD. | ||
GIT_TRACE2_EVENT="$(pwd)/backfill-file-trace" git \ | ||
-C backfill1 backfill && | ||
# We should have engaged the partial clone machinery | ||
test_trace2_data promisor fetch_count 48 <backfill-file-trace && | ||
# No more missing objects! | ||
git -C backfill1 rev-list --quiet --objects --missing=print HEAD >revs2 && | ||
test_line_count = 0 revs2 | ||
' | ||
|
||
. "$TEST_DIRECTORY"/lib-httpd.sh | ||
start_httpd | ||
|
||
test_expect_success 'create a partial clone over HTTP' ' | ||
SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" && | ||
rm -rf "$SERVER" repo && | ||
git clone --bare "file://$(pwd)/src" "$SERVER" && | ||
test_config -C "$SERVER" uploadpack.allowfilter 1 && | ||
test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 && | ||
git clone --no-checkout --filter=blob:none \ | ||
"$HTTPD_URL/smart/server" backfill-http | ||
' | ||
|
||
test_expect_success 'backfilling over HTTP succeeds' ' | ||
GIT_TRACE2_EVENT="$(pwd)/backfill-http-trace" git \ | ||
-C backfill-http backfill && | ||
# We should have engaged the partial clone machinery | ||
test_trace2_data promisor fetch_count 48 <backfill-http-trace && | ||
# Confirm all objects are present, none missing. | ||
git -C backfill-http rev-list --objects --all >rev-list-out && | ||
awk "{print \$1;}" <rev-list-out >oids && | ||
GIT_TRACE2_EVENT="$(pwd)/walk-trace" git -C backfill-http \ | ||
cat-file --batch-check <oids >batch-out && | ||
! grep missing batch-out | ||
' | ||
|
||
# DO NOT add non-httpd-specific tests here, because the last part of this | ||
# test script is only executed when httpd is available and enabled. | ||
|
||
test_done |