All of lore.kernel.org
 help / color / mirror / Atom feed
From: git@jeffhostetler.com
To: git@vger.kernel.org
Cc: gitster@pobox.com, peff@peff.net,
	Jeff Hostetler <jeffhost@microsoft.com>
Subject: [PATCH v11 2/5] p0006-read-tree-checkout: perf test to time read-tree
Date: Mon, 17 Apr 2017 21:37:31 +0000	[thread overview]
Message-ID: <20170417213734.55373-3-git@jeffhostetler.com> (raw)
In-Reply-To: <20170417213734.55373-1-git@jeffhostetler.com>

From: Jeff Hostetler <jeffhost@microsoft.com>

Created t/perf/repos/many-files.sh to generate large, but
artificial repositories.

Created t/perf/p0006-read-tree-checkout.sh to measure
performance on various read-tree, checkout, and update-index
operations.  This test can run using either artificial repos
described above or normal repos.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 t/perf/p0006-read-tree-checkout.sh |  67 ++++++++++++++++++++++
 t/perf/repos/.gitignore            |   1 +
 t/perf/repos/many-files.sh         | 110 +++++++++++++++++++++++++++++++++++++
 3 files changed, 178 insertions(+)
 create mode 100755 t/perf/p0006-read-tree-checkout.sh
 create mode 100644 t/perf/repos/.gitignore
 create mode 100755 t/perf/repos/many-files.sh

diff --git a/t/perf/p0006-read-tree-checkout.sh b/t/perf/p0006-read-tree-checkout.sh
new file mode 100755
index 0000000..78cc23f
--- /dev/null
+++ b/t/perf/p0006-read-tree-checkout.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+#
+# This test measures the performance of various read-tree
+# and checkout operations.  It is primarily interested in
+# the algorithmic costs of index operations and recursive
+# tree traversal -- and NOT disk I/O on thousands of files.
+
+test_description="Tests performance of read-tree"
+
+. ./perf-lib.sh
+
+test_perf_default_repo
+
+# If the test repo was generated by ./repos/many-files.sh
+# then we know something about the data shape and branches,
+# so we can isolate testing to the ballast-related commits
+# and setup sparse-checkout so we don't have to populate
+# the ballast files and directories.
+#
+# Otherwise, we make some general assumptions about the
+# repo and consider the entire history of the current
+# branch to be the ballast.
+
+test_expect_success "setup repo" '
+	if git rev-parse --verify refs/heads/p0006-ballast^{commit}
+	then
+		echo Assuming synthetic repo from many-files.sh
+		git branch br_base            master
+		git branch br_ballast         p0006-ballast^
+		git branch br_ballast_alias   p0006-ballast^
+		git branch br_ballast_plus_1  p0006-ballast
+		git config --local core.sparsecheckout 1
+		cat >.git/info/sparse-checkout <<-EOF
+		/*
+		!ballast/*
+		EOF
+	else
+		echo Assuming non-synthetic repo...
+		git branch br_base            $(git rev-list HEAD | tail -n 1)
+		git branch br_ballast         HEAD^ || error "no ancestor commit from current head"
+		git branch br_ballast_alias   HEAD^
+		git branch br_ballast_plus_1  HEAD
+	fi &&
+	git checkout -q br_ballast &&
+	nr_files=$(git ls-files | wc -l)
+'
+
+test_perf "read-tree br_base br_ballast ($nr_files)" '
+	git read-tree -m br_base br_ballast -n
+'
+
+test_perf "switch between br_base br_ballast ($nr_files)" '
+	git checkout -q br_base &&
+	git checkout -q br_ballast
+'
+
+test_perf "switch between br_ballast br_ballast_plus_1 ($nr_files)" '
+	git checkout -q br_ballast_plus_1 &&
+	git checkout -q br_ballast
+'
+
+test_perf "switch between aliases ($nr_files)" '
+	git checkout -q br_ballast_alias &&
+	git checkout -q br_ballast
+'
+
+test_done
diff --git a/t/perf/repos/.gitignore b/t/perf/repos/.gitignore
new file mode 100644
index 0000000..72e3dc3
--- /dev/null
+++ b/t/perf/repos/.gitignore
@@ -0,0 +1 @@
+gen-*/
diff --git a/t/perf/repos/many-files.sh b/t/perf/repos/many-files.sh
new file mode 100755
index 0000000..5a1d25e
--- /dev/null
+++ b/t/perf/repos/many-files.sh
@@ -0,0 +1,110 @@
+#!/bin/sh
+## Generate test data repository using the given parameters.
+## When omitted, we create "gen-many-files-d-w-f.git".
+##
+## Usage: [-r repo] [-d depth] [-w width] [-f files]
+##
+## -r repo: path to the new repo to be generated
+## -d depth: the depth of sub-directories
+## -w width: the number of sub-directories at each level
+## -f files: the number of files created in each directory
+##
+## Note that all files will have the same SHA-1 and each
+## directory at a level will have the same SHA-1, so we
+## will potentially have a large index, but not a large
+## ODB.
+##
+## Ballast will be created under "ballast/".
+
+EMPTY_BLOB=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
+
+set -e
+
+## (5, 10, 9) will create 999,999 ballast files.
+## (4, 10, 9) will create  99,999 ballast files.
+depth=5
+width=10
+files=9
+
+while test "$#" -ne 0
+do
+    case "$1" in
+	-r)
+	    shift;
+	    test "$#" -ne 0 || { echo 'error: -r requires an argument' >&2; exit 1; }
+	    repo=$1;
+	    shift ;;
+	-d)
+	    shift;
+	    test "$#" -ne 0 || { echo 'error: -d requires an argument' >&2; exit 1; }
+	    depth=$1;
+	    shift ;;
+	-w)
+	    shift;
+	    test "$#" -ne 0 || { echo 'error: -w requires an argument' >&2; exit 1; }
+	    width=$1;
+	    shift ;;
+	-f)
+	    shift;
+	    test "$#" -ne 0 || { echo 'error: -f requires an argument' >&2; exit 1; }
+	    files=$1;
+	    shift ;;
+	*)
+	    echo "error: unknown option '$1'" >&2; exit 1 ;;
+	esac
+done
+
+## Inflate the index with thousands of empty files.
+## usage: dir depth width files
+fill_index() {
+	awk -v arg_dir=$1 -v arg_depth=$2 -v arg_width=$3 -v arg_files=$4 '
+		function make_paths(dir, depth, width, files, f, w) {
+			for (f = 1; f <= files; f++) {
+				print dir "/file" f
+			}
+			if (depth > 0) {
+				for (w = 1; w <= width; w++) {
+					make_paths(dir "/dir" w, depth - 1, width, files)
+				}
+			}
+		}
+		END { make_paths(arg_dir, arg_depth, arg_width, arg_files) }
+		' </dev/null |
+	sed "s/^/100644 $EMPTY_BLOB	/" |
+	git update-index --index-info
+	return 0
+}
+
+[ -z "$repo" ] && repo=gen-many-files-$depth.$width.$files.git
+
+mkdir $repo
+cd $repo
+git init .
+
+## Create an initial commit just to define master.
+touch many-files.empty
+echo "$depth $width $files" >many-files.params
+git add many-files.*
+git commit -q -m params
+
+## Create ballast for p0006 based upon the given params and
+## inflate the index with thousands of empty files and commit.
+git checkout -b p0006-ballast
+fill_index "ballast" $depth $width $files
+git commit -q -m "ballast"
+
+nr_files=$(git ls-files | wc -l)
+
+## Modify 1 file and commit.
+echo "$depth $width $files" >>many-files.params
+git add many-files.params
+git commit -q -m "ballast plus 1"
+
+## Checkout master to put repo in canonical state (because
+## the perf test may need to clone and enable sparse-checkout
+## before attempting to checkout a commit with the ballast
+## (because it may contain 100K directories and 1M files)).
+git checkout master
+
+echo "Repository "$repo" ($depth, $width, $files) created.  Ballast $nr_files."
+exit 0
-- 
2.9.3


  parent reply	other threads:[~2017-04-17 21:37 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-17 21:37 [PATCH v11 0/5] read-cache: speed up add_index_entry git
2017-04-17 21:37 ` [PATCH v11 1/5] read-cache: add strcmp_offset function git
2017-04-17 21:37 ` git [this message]
2017-04-18 21:40   ` [PATCH v11 2/5] p0006-read-tree-checkout: perf test to time read-tree Thomas Gummerer
2017-04-19  1:25     ` Jeff King
2017-04-17 21:37 ` [PATCH v11 3/5] read-cache: speed up add_index_entry during checkout git
2017-04-17 21:37 ` [PATCH v11 4/5] read-cache: speed up has_dir_name (part 1) git
2017-04-17 21:37 ` [PATCH v11 5/5] read-cache: speed up has_dir_name (part 2) git

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170417213734.55373-3-git@jeffhostetler.com \
    --to=git@jeffhostetler.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jeffhost@microsoft.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.