All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jeff King <peff@peff.net>
To: git@vger.kernel.org
Cc: Junio C Hamano <gitster@pobox.com>
Subject: [PATCH v2 14/15] pack-bitmap: implement BLOB_LIMIT filtering
Date: Fri, 14 Feb 2020 13:22:39 -0500	[thread overview]
Message-ID: <20200214182239.GN150965@coredump.intra.peff.net> (raw)
In-Reply-To: <20200214182147.GA654525@coredump.intra.peff.net>

Just as the previous commit implemented BLOB_NONE, we can support
BLOB_LIMIT filters by looking at the sizes of any blobs in the result
and unsetting their bits as appropriate. This is slightly more expensive
than BLOB_NONE, but still produces a noticeable speedup (these results
are on git.git):

  Test                                         HEAD~2            HEAD
  ------------------------------------------------------------------------------------
  5310.9:  rev-list count with blob:none       1.80(1.77+0.02)   0.22(0.20+0.02) -87.8%
  5310.10: rev-list count with blob:limit=1k   1.99(1.96+0.03)   0.29(0.25+0.03) -85.4%

The implementation is similar to the BLOB_NONE one, with the exception
that we have to go object-by-object while walking the blob-type bitmap
(since we can't mask out the matches, but must look up the size
individually for each blob). The trick with using ctz64() is taken from
show_objects_for_type(), which likewise needs to find individual bits
(but wants to quickly skip over big chunks without blobs).

Signed-off-by: Jeff King <peff@peff.net>
---
 pack-bitmap.c                      | 80 ++++++++++++++++++++++++++++++
 t/perf/p5310-pack-bitmaps.sh       |  5 ++
 t/t6113-rev-list-bitmap-filters.sh | 20 +++++++-
 3 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/pack-bitmap.c b/pack-bitmap.c
index dcf8a9aadf..9d680065e4 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -779,6 +779,78 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git,
 	bitmap_free(tips);
 }
 
+static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
+				     uint32_t pos)
+{
+	struct packed_git *pack = bitmap_git->pack;
+	unsigned long size;
+	struct object_info oi = OBJECT_INFO_INIT;
+
+	oi.sizep = &size;
+
+	if (pos < pack->num_objects) {
+		struct revindex_entry *entry = &pack->revindex[pos];
+		if (packed_object_info(the_repository, pack,
+				       entry->offset, &oi) < 0) {
+			struct object_id oid;
+			nth_packed_object_oid(&oid, pack, entry->nr);
+			die(_("unable to get size of %s"), oid_to_hex(&oid));
+		}
+	} else {
+		struct eindex *eindex = &bitmap_git->ext_index;
+		struct object *obj = eindex->objects[pos - pack->num_objects];
+		if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+			die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
+	}
+
+	return size;
+}
+
+static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git,
+				     struct object_list *tip_objects,
+				     struct bitmap *to_filter,
+				     unsigned long limit)
+{
+	struct eindex *eindex = &bitmap_git->ext_index;
+	struct bitmap *tips;
+	struct ewah_iterator it;
+	eword_t mask;
+	uint32_t i;
+
+	tips = find_tip_blobs(bitmap_git, tip_objects);
+
+	for (i = 0, init_type_iterator(&it, bitmap_git, OBJ_BLOB);
+	     i < to_filter->word_alloc && ewah_iterator_next(&mask, &it);
+	     i++) {
+		eword_t word = to_filter->words[i] & mask;
+		unsigned offset;
+
+		for (offset = 0; offset < BITS_IN_EWORD; offset++) {
+			uint32_t pos;
+
+			if ((word >> offset) == 0)
+				break;
+			offset += ewah_bit_ctz64(word >> offset);
+			pos = i * BITS_IN_EWORD + offset;
+
+			if (!bitmap_get(tips, pos) &&
+			    get_size_by_pos(bitmap_git, pos) >= limit)
+				bitmap_unset(to_filter, pos);
+		}
+	}
+
+	for (i = 0; i < eindex->count; i++) {
+		uint32_t pos = i + bitmap_git->pack->num_objects;
+		if (eindex->objects[i]->type == OBJ_BLOB &&
+		    bitmap_get(to_filter, pos) &&
+		    !bitmap_get(tips, pos) &&
+		    get_size_by_pos(bitmap_git, pos) >= limit)
+			bitmap_unset(to_filter, pos);
+	}
+
+	bitmap_free(tips);
+}
+
 static int filter_bitmap(struct bitmap_index *bitmap_git,
 			 struct object_list *tip_objects,
 			 struct bitmap *to_filter,
@@ -794,6 +866,14 @@ static int filter_bitmap(struct bitmap_index *bitmap_git,
 		return 0;
 	}
 
+	if (filter->choice == LOFC_BLOB_LIMIT) {
+		if (bitmap_git)
+			filter_bitmap_blob_limit(bitmap_git, tip_objects,
+						 to_filter,
+						 filter->blob_limit_value);
+		return 0;
+	}
+
 	/* filter choice not handled */
 	return -1;
 }
diff --git a/t/perf/p5310-pack-bitmaps.sh b/t/perf/p5310-pack-bitmaps.sh
index 936742314c..8b43a545c1 100755
--- a/t/perf/p5310-pack-bitmaps.sh
+++ b/t/perf/p5310-pack-bitmaps.sh
@@ -52,6 +52,11 @@ test_perf 'rev-list count with blob:none' '
 		--filter=blob:none >/dev/null
 '
 
+test_perf 'rev-list count with blob:limit=1k' '
+	git rev-list --use-bitmap-index --count --objects --all \
+		--filter=blob:limit=1k >/dev/null
+'
+
 test_expect_success 'create partial bitmap state' '
 	# pick a commit to represent the repo tip in the past
 	cutoff=$(git rev-list HEAD~100 -1) &&
diff --git a/t/t6113-rev-list-bitmap-filters.sh b/t/t6113-rev-list-bitmap-filters.sh
index f4e6d582f0..145603f124 100755
--- a/t/t6113-rev-list-bitmap-filters.sh
+++ b/t/t6113-rev-list-bitmap-filters.sh
@@ -6,8 +6,10 @@ test_description='rev-list combining bitmaps and filters'
 test_expect_success 'set up bitmapped repo' '
 	# one commit will have bitmaps, the other will not
 	test_commit one &&
+	test_commit much-larger-blob-one &&
 	git repack -adb &&
-	test_commit two
+	test_commit two &&
+	test_commit much-larger-blob-two
 '
 
 test_expect_success 'filters fallback to non-bitmap traversal' '
@@ -35,4 +37,20 @@ test_expect_success 'blob:none filter with specified blob' '
 	test_bitmap_traversal expect actual
 '
 
+test_expect_success 'blob:limit filter' '
+	git rev-list --objects --filter=blob:limit=5 HEAD >expect &&
+	git rev-list --use-bitmap-index \
+		     --objects --filter=blob:limit=5 HEAD >actual &&
+	test_bitmap_traversal expect actual
+'
+
+test_expect_success 'blob:limit filter with specified blob' '
+	git rev-list --objects --filter=blob:limit=5 \
+		     HEAD HEAD:much-larger-blob-two.t >expect &&
+	git rev-list --use-bitmap-index \
+		     --objects --filter=blob:limit=5 \
+		     HEAD HEAD:much-larger-blob-two.t >actual &&
+	test_bitmap_traversal expect actual
+'
+
 test_done
-- 
2.25.0.796.gcc29325708


  parent reply	other threads:[~2020-02-14 18:22 UTC|newest]

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-13  2:15 [PATCH 0/13] combining object filters and bitmaps Jeff King
2020-02-13  2:16 ` [PATCH 01/13] pack-bitmap: factor out type iterator initialization Jeff King
2020-02-13 17:45   ` Junio C Hamano
2020-02-13  2:16 ` [PATCH 02/13] pack-bitmap: fix leak of haves/wants object lists Jeff King
2020-02-13 18:12   ` Junio C Hamano
2020-02-13  2:17 ` [PATCH 03/13] rev-list: fallback to non-bitmap traversal when filtering Jeff King
2020-02-13 18:19   ` Junio C Hamano
2020-02-13 18:40     ` Jeff King
2020-02-13  2:17 ` [PATCH 04/13] rev-list: consolidate bitmap-disabling options Jeff King
2020-02-13  2:18 ` [PATCH 05/13] rev-list: factor out bitmap-optimized routines Jeff King
2020-02-13 18:34   ` Junio C Hamano
2020-02-13  2:19 ` [PATCH 06/13] rev-list: make --count work with --objects Jeff King
2020-02-13 19:14   ` Junio C Hamano
2020-02-13 20:27     ` Jeff King
2020-02-13  2:20 ` [PATCH 07/13] rev-list: allow bitmaps when counting objects Jeff King
2020-02-13 21:47   ` Junio C Hamano
2020-02-13 22:27     ` Jeff King
2020-02-13  2:20 ` [PATCH 08/13] pack-bitmap: basic noop bitmap filter infrastructure Jeff King
2020-02-13  2:21 ` [PATCH 09/13] rev-list: use bitmap filters for traversal Jeff King
2020-02-13 22:22   ` Junio C Hamano
2020-02-13 22:34     ` Jeff King
2020-02-13  2:21 ` [PATCH 10/13] bitmap: add bitmap_unset() function Jeff King
2020-02-13  2:23 ` [PATCH 11/13] pack-bitmap: implement BLOB_NONE filtering Jeff King
2020-02-13  2:25 ` [PATCH 12/13] pack-bitmap: implement BLOB_LIMIT filtering Jeff King
2020-02-13 23:17   ` Junio C Hamano
2020-02-13  2:25 ` [PATCH 13/13] pack-objects: support filters with bitmaps Jeff King
2020-02-14 18:21 ` [PATCH v2 0/15] combining object filters and bitmaps Jeff King
2020-02-14 18:22   ` [PATCH v2 01/15] pack-bitmap: factor out type iterator initialization Jeff King
2020-02-15  0:10     ` Taylor Blau
2020-02-14 18:22   ` [PATCH v2 02/15] pack-bitmap: fix leak of haves/wants object lists Jeff King
2020-02-15  0:15     ` Taylor Blau
2020-02-15  6:46       ` Jeff King
2020-02-18 17:58     ` Derrick Stolee
2020-02-18 20:02       ` Jeff King
2020-02-14 18:22   ` [PATCH v2 03/15] rev-list: fallback to non-bitmap traversal when filtering Jeff King
2020-02-15  0:22     ` Taylor Blau
2020-02-14 18:22   ` [PATCH v2 04/15] pack-bitmap: refuse to do a bitmap traversal with pathspecs Jeff King
2020-02-14 19:03     ` Junio C Hamano
2020-02-14 20:51       ` Jeff King
2020-02-14 18:22   ` [PATCH v2 05/15] rev-list: factor out bitmap-optimized routines Jeff King
2020-02-15  0:35     ` Taylor Blau
2020-02-14 18:22   ` [PATCH v2 06/15] rev-list: make --count work with --objects Jeff King
2020-02-15  0:42     ` Taylor Blau
2020-02-15  6:48       ` Jeff King
2020-02-16 23:34         ` Junio C Hamano
2020-02-18  5:24           ` Jeff King
2020-02-18 17:28             ` Junio C Hamano
2020-02-18 19:55               ` Jeff King
2020-02-18 21:19                 ` Junio C Hamano
2020-02-18 21:23                   ` Jeff King
2020-02-18 18:05     ` Derrick Stolee
2020-02-18 19:59       ` Jeff King
2020-02-14 18:22   ` [PATCH v2 07/15] rev-list: allow bitmaps when counting objects Jeff King
2020-02-15  0:45     ` Taylor Blau
2020-02-15  6:55       ` Jeff King
2020-02-16 23:36         ` Junio C Hamano
2020-02-14 18:22   ` [PATCH v2 08/15] t5310: factor out bitmap traversal comparison Jeff King
2020-02-15  2:14     ` Taylor Blau
2020-02-15  7:00       ` Jeff King
2020-02-14 18:22   ` [PATCH v2 09/15] rev-list: allow commit-only bitmap traversals Jeff King
2020-02-18 18:18     ` Derrick Stolee
2020-02-18 20:05       ` Jeff King
2020-02-18 20:11         ` Derrick Stolee
2020-02-14 18:22   ` [PATCH v2 10/15] pack-bitmap: basic noop bitmap filter infrastructure Jeff King
2020-02-14 18:22   ` [PATCH v2 11/15] rev-list: use bitmap filters for traversal Jeff King
2020-02-14 18:22   ` [PATCH v2 12/15] bitmap: add bitmap_unset() function Jeff King
2020-02-14 18:22   ` [PATCH v2 13/15] pack-bitmap: implement BLOB_NONE filtering Jeff King
2020-02-18 19:26     ` Derrick Stolee
2020-02-18 19:36       ` Derrick Stolee
2020-02-18 20:30         ` Jeff King
2020-02-18 20:24       ` Jeff King
2020-02-14 18:22   ` Jeff King [this message]
2020-02-14 18:22   ` [PATCH v2 15/15] pack-objects: support filters with bitmaps Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200214182239.GN150965@coredump.intra.peff.net \
    --to=peff@peff.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.