# # This file is a log of investigations into non-obvious behaviors of "partial clone" repositories, # using a (writable, throwaway) fork of the linux kernel as test repository. # # The OUTCOMES described here are for git 2.31.1 on fedora linux 33 as of 2021-06-03 # # ----- # SETUP # ----- # Enable perf tracing, including child process details export GIT_TRACE2_PERF=1 # Set up a suitable read & writable linux kernel remote URL export REMOTE_URL="https://github.com/TaoK/linux.git" # ------- # OBSERVATIONS: # - repos without promisor packfiles run a "git rev-list" process as part of "fetch.c", right after the "remote_refs" and within the "consume_refs" labelled code areas # - these processes are generally very fast # - these processes *don't run* if the packfiles containing the "tips"/refs shared/fetched from the remote are all promisor packfiles # - these processes *run differently* if *any* of the packfiles containing the "tips"/refs shared/fetched from the remote are not promisor packfiles, but the remote is a promisor # - Specifically, the same child processes do run, but an extra "--exclude-promisor-objects" parameter is added. # - these child processes, running with a promisor remote, run very fast if promisor packfiles are small # - these same child processes run very *slow* if promisor packfiles are large # - that slowness is not *proportional* to packfile size, but it is related; this relationship appears to be non-linear # - that slowness is substantially increased, for example, by using a "loose" filter, and ending up with many or all blobs *not missing* from the promisor packfile # - that slowness seems to depend exclusively on the size/content of the promisor packfiles; non-promisor packfiles "trigger" the behavior but do not appear to impact its speed # ------- # Regular single-branch quite-shallow clone without checkout, followed by a regular fetch # -> in the fetch, the "remote_refs" and "consume_refs" labelled areas spawn "git rev-list --objects --stdin --not --all --quiet --alternate-refs" processes # -> these processes complete instantaneously in this context # -> an equivalent process (same args) is invoked during clone, and labels progress as "Checking connectivity" TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --no-checkout --shallow-since=2018-01-01 && git -C $TESTFOLDER fetch # Fully blob-filtered single-branch quite-shallow clone without checkout, followed by a regular fetch # -> "remote_refs" and "consume_refs" don't call "rev-list" child processes at all TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --no-checkout --shallow-since=2018-01-01 --filter=blob:none && git -C $TESTFOLDER fetch # Regular single-branch quite-shallow clone, with a blob-filtered "tip" # (another repo is created just to move the tip for the promisor packfile to be created) # -> "remote_refs" and "consume_refs" don't call "rev-list" child processes at all TESTFOLDER="linuxtest_$RANDOM" && TIP_MOVER_FOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --no-checkout --shallow-since=2018-01-01 && git clone $REMOTE_URL $TIP_MOVER_FOLDER --depth=1 && echo "Something $RANDOM" > "$TIP_MOVER_FOLDER/test_file_$RANDOM" && git -C $TIP_MOVER_FOLDER add -A && git -C $TIP_MOVER_FOLDER commit --no-gpg-sign -m "test file commit" && git -C $TIP_MOVER_FOLDER push origin HEAD && git -C $TESTFOLDER config remote.origin.promisor true && git -C $TESTFOLDER config remote.origin.partialclonefilter blob:none && git -C $TESTFOLDER fetch && git -C $TESTFOLDER fetch # Regular single-branch quite-shallow clone, upgraded to a promisor remote # -> "remote_refs" and "consume_refs" *do* call "rev-list" child processes, with an extra parameter # -> these rev-list child processes complete very fast (presumably their speed relates to the volume of promisor packfiles in some way) TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --no-checkout --shallow-since=2018-01-01 && git -C $TESTFOLDER config remote.origin.promisor true && git -C $TESTFOLDER config remote.origin.partialclonefilter blob:none && git -C $TESTFOLDER fetch # Regular single-branch quite-shallow clone, with *short* blob-filtered "roots" added later # (the deepening is a little chaotic, with lots of blobs being retrieved, presumably from the different shallow roots) # -> the presence of small promisor packfiles (NOT at the tip) makes no difference; non-promisor packfiles at the tip trigger the rev-list call, it completes fast TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --no-checkout --shallow-since=2018-01-01 && git -C $TESTFOLDER config remote.origin.promisor true && git -C $TESTFOLDER config remote.origin.partialclonefilter blob:none && git -C $TESTFOLDER fetch --shallow-since=2017-12-30 && git -C $TESTFOLDER fetch # Regular single-branch quite-shallow clone, with both a blob-filtered "tip" and a non-promisor-packfile "tip" # (another repo is created just to move the tip for the promisor packfile to be created) # -> the presence of small promisor packfiles (at the tip) makes no difference; non-promisor packfiles at the tip trigger the rev-list call, it completes fast TESTFOLDER="linuxtest_$RANDOM" && TIP_MOVER_FOLDER="linuxtest_$RANDOM" && ORIGINAL_TIP_BRANCH="testbranch_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --no-checkout --shallow-since=2018-01-01 && git clone $REMOTE_URL $TIP_MOVER_FOLDER --depth=1 && echo "Something $RANDOM" > "$TIP_MOVER_FOLDER/test_file_$RANDOM" && git -C $TIP_MOVER_FOLDER add -A && git -C $TIP_MOVER_FOLDER commit --no-gpg-sign -m "test file commit" && git -C $TIP_MOVER_FOLDER push origin HEAD && git -C $TESTFOLDER config --add remote.origin.fetch "+refs/heads/$ORIGINAL_TIP_BRANCH:refs/remotes/origin/$ORIGINAL_TIP_BRANCH" && git -C $TESTFOLDER push origin "HEAD:refs/heads/$ORIGINAL_TIP_BRANCH" && git -C $TESTFOLDER config remote.origin.promisor true && git -C $TESTFOLDER config remote.origin.partialclonefilter blob:none && git -C $TESTFOLDER fetch && git -C $TESTFOLDER fetch # Regular single-branch very-shallow clone, with full blob-filtered history added later # -> the presence of *large* promisor packfiles (with non-promisor packfiles at the tip) means the rev-list call runs, and runs long... # (size: 205MB + 1.09GB, clone & unshallow: 60s + 314s, fetch: 324s made up of 2X 162s rev-list) TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --no-checkout --depth=1 && git -C $TESTFOLDER config remote.origin.promisor true && git -C $TESTFOLDER config remote.origin.partialclonefilter blob:none && git -C $TESTFOLDER fetch --unshallow && git -C $TESTFOLDER fetch # Regular single-branch very-shallow clone, with full barely-filtered history added later # -> the presence of *huge* promisor packfiles (with non-promisor packfiles at the tip) means the rev-list call runs even longer # (size: 205MB + 2.73GB, clone & unshallow: 60s + 1420s, fetch: 1900s made up of 2X 940s rev-list) TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --no-checkout --depth=1 && git -C $TESTFOLDER config remote.origin.promisor true && git -C $TESTFOLDER config remote.origin.partialclonefilter blob:limit=1m && git -C $TESTFOLDER fetch --unshallow && git -C $TESTFOLDER fetch # Regular single-branch "deep" clone *of a very old ref*, tag "v2.6.13", and then full blob-filtered history *since then* added later # -> the presence of *large* promisor packfiles (with non-promisor packfiles at the root) has exactly the same impact as with non-promisors at tip # (size: 76MB + 1.09GB, clone & fetch: 9s + 265s, fetch: 320s made up of 2X 160s rev-list) TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --no-checkout -b v2.6.13 --single-branch && git -C $TESTFOLDER config --add remote.origin.fetch "+refs/heads/master:refs/remotes/origin/master" && git -C $TESTFOLDER config remote.origin.promisor true && git -C $TESTFOLDER config remote.origin.partialclonefilter blob:none && git -C $TESTFOLDER fetch && git -C $TESTFOLDER fetch # ------- # OBSERVATIONS: # - running git repack on a fully-promisored repo can/will yield non-promisor packfiles (if there are loose objects from initially-local commits) # - if/when such a locally-packed non-promisor packfile contains any "tip" commits for the promisor remote, then: # - fetch will be slow (depending on the size of promissor packfiles) # - this will continue until you manually mark the pack file as ".promisor" OR you get a later tip for the affected branch(es) from a promisor remote # ------- # # (clone: 180s, add: 70s warmup, fetch: 1s, repack: 173s, fetch: 340s from 2X rev-list at 170s) # -> after repack, fetch is *very* slow until one of two corrective circumstances arise: # 1. you make the stray packfile a promisor packfile, or # 2. you fetch another change (to that branch) from the promisor remote / the tip of the branch "naturally" becomes a promisor again TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --single-branch --filter=blob:none && echo "Something $RANDOM" > "$TESTFOLDER/test_file_$RANDOM" && git -C $TESTFOLDER add -A && git -C $TESTFOLDER commit --no-gpg-sign -m "test file commit" && git -C $TESTFOLDER push && git -C $TESTFOLDER fetch git -C $TESTFOLDER repack && git -C $TESTFOLDER fetch FIXUPFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $FIXUPFOLDER --depth=1 && echo "Something $RANDOM" > "$FIXUPFOLDER/test_file_$RANDOM" && git -C $FIXUPFOLDER add -A && git -C $FIXUPFOLDER commit --no-gpg-sign -m "test file commit" && git -C $FIXUPFOLDER push git -C $TESTFOLDER fetch # ------- # OBSERVATIONS: # - Under some specific circumstances, force-pushing a branch from a partial clone causes this repo # to *re-fetch the repo's commits & trees* into another new promisor packfile. # - This can be repeated any number of times, yielding effectively identical (large) duplicate packfiles # - The preconditions appear to be that the remote and the local repo each have mutually unknown commits at the tip... # - The behavior is to re-download the commits & trees only, even if the filter settings on the repo are "lax" (eg "limit=1M") # ------- # Get a fully filtered full clone, commit to the branch from elsewhere, and force the originally cloned branch state back # -> during the forced push we randomly, strangely, re-retrieve the whole filtered clone data - commits + trees TESTFOLDER="linuxtest_$RANDOM" && INTERFERINGFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $INTERFERINGFOLDER --depth=1 && git clone $REMOTE_URL $TESTFOLDER --filter=blob:none && git -C $INTERFERINGFOLDER commit --allow-empty --no-gpg-sign -m "test file commit" && git -C $INTERFERINGFOLDER push && git -C $TESTFOLDER push -f # Confirm - there are two 1.09-GB packfiles ll "$TESTFOLDER/.git/objects/pack" # This can be repeated any number of times, each time creating yet another 1.09GB promisor packfile... git -C $INTERFERINGFOLDER commit --allow-empty --no-gpg-sign -m "test file commit" && git -C $INTERFERINGFOLDER push -f && git -C $TESTFOLDER push -f # Confirm - there are three 1.09-GB packfiles ll "$TESTFOLDER/.git/objects/pack" # This can be repeated any number of times, each time creating yet another 1.09GB promisor packfile... git -C $INTERFERINGFOLDER commit --allow-empty --no-gpg-sign -m "test file commit" && git -C $INTERFERINGFOLDER push -f && git -C $TESTFOLDER push -f # Confirm - there are four 1.09-GB packfiles ll "$TESTFOLDER/.git/objects/pack" # Again with a "loosely" filtered clone # -> Even though the original clone size is 3GB, the later "duplicated" download is once again 1.09 GB - commits + trees only. TESTFOLDER="linuxtest_$RANDOM" && INTERFERINGFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $INTERFERINGFOLDER --depth=1 && git clone $REMOTE_URL $TESTFOLDER --filter=blob:limit=1M && git -C $INTERFERINGFOLDER commit --allow-empty --no-gpg-sign -m "test file commit" && git -C $INTERFERINGFOLDER push && git -C $TESTFOLDER push -f # ---- # OBSERVATIONS: # - git repack is slow on filtered repos (repos with promisor packfiles) # - git repack's speed is *directly related to the size of the promisor packfiles* # (but given that repos with a mix of promisor and non-promisor packfiles misbehave in other ways, this conclusion is of limited value) # ---- # Baseline - regular full clone # (size = 3.11GB, clone = 235s, repack = 10s) TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER && git -C $TESTFOLDER repack # Baseline - very-shallow single-branch clone # (size = 203MB, clone = 74s, repack = 0s) TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --depth=1 && git -C $TESTFOLDER repack # Demo - fully blob-filtered full clone, normal checkout # (size = 1.09GB + 203MB, clone = 90s + 100s, repack = 180s) TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --filter=blob:none && git -C $TESTFOLDER repack # Demo - fully blob-filtered full clone, no checkout # (size = 1.09GB, clone = 70s, repack = 160s) TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --filter=blob:none --no-checkout && git -C $TESTFOLDER repack # Demo - fully blob-filtered very-shallow single-branch clone, normal checkout # (size = 203MB, clone = 150s, repack = 8s) TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --filter=blob:none --depth=1 && git -C $TESTFOLDER repack # Demo - fully blob-filtered very-shallow single-branch clone, no checkout # (size = 2MB, clone = 2s, repack = 0s) TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --filter=blob:none --no-checkout --depth=1 && git -C $TESTFOLDER repack # Demo - barely blob-filtered single-branch clone, no checkout # (size = 3.1GB, clone = 200s, repack = 810s) TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --filter=blob:limit=10m --single-branch --no-checkout && git -C $TESTFOLDER repack # Demo - medium-deep shallow single-branch clone (no checkout), followed by fully filtered unshallow # (promisor packfile "in the past" of the single tip ref, with most history in the "tip" non-promisor packfile) # (size = 2.63GB + 200MB, clone = 337s + 108s, repack = 28s) # -> Repack time is directly related to promisor packfile size/content/scope when further back in history TESTFOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --no-checkout --shallow-since=2010-01-01 --single-branch && git -C $TESTFOLDER config remote.origin.promisor true && git -C $TESTFOLDER config remote.origin.partialclonefilter blob:none && git -C $TESTFOLDER fetch --unshallow && git -C $TESTFOLDER repack # Demo - reasonably shallow single-branch clone (no checkout), followed by extra commit appearing at tip on the remote, # followed by filtered fetch of that new tip # (promisor packfile "at the tip", with most of the history in a non-promisor packfile "behind the tip") # (size = 981MB + , clone = 2s, repack = 3s) # (TIP_MOVER clone time ignored) # -> Repack time is directly related to promisor packfile size/content/scope when at tip TESTFOLDER="linuxtest_$RANDOM" && TIP_MOVER_FOLDER="linuxtest_$RANDOM" && git clone $REMOTE_URL $TESTFOLDER --no-checkout --shallow-since=2018-01-01 --single-branch && git clone $REMOTE_URL $TIP_MOVER_FOLDER --depth=1 && echo "Something $RANDOM" > "$TIP_MOVER_FOLDER/test_file_$RANDOM" && git -C $TIP_MOVER_FOLDER add -A && git -C $TIP_MOVER_FOLDER commit --no-gpg-sign -m "test file commit" && git -C $TIP_MOVER_FOLDER push origin HEAD && git -C $TESTFOLDER config remote.origin.promisor true && git -C $TESTFOLDER config remote.origin.partialclonefilter blob:none && git -C $TESTFOLDER fetch && git -C $TESTFOLDER repack # ------- # CLEANUP # ------- # Delete all the weird repos created. rm -rf linuxtest_*