Git Mailing List Archive on lore.kernel.org
 help / color / Atom feed
From: Jeff King <peff@peff.net>
To: git@vger.kernel.org
Subject: [PATCH 12/13] pack-bitmap: implement BLOB_LIMIT filtering
Date: Wed, 12 Feb 2020 21:25:23 -0500
Message-ID: <20200213022523.GL1126038@coredump.intra.peff.net> (raw)
In-Reply-To: <20200213021506.GA1124607@coredump.intra.peff.net>

Just as the previous commit implemented BLOB_NONE, we can support
BLOB_LIMIT filters by looking at the sizes of any blobs in the result
and unsetting their bits as appropriate. This is slightly more expensive
than BLOB_NONE, but still produces a noticeable speedup (these results
are on git.git):

  Test                                        HEAD~2            HEAD
  ------------------------------------------------------------------------------------
  5310.7: rev-list count with blob:none       1.80(1.77+0.02)   0.22(0.20+0.02) -87.8%
  5310.8: rev-list count with blob:limit=1k   1.99(1.96+0.03)   0.29(0.25+0.03) -85.4%

The implementation is similar to the BLOB_NONE one, with the exception
that we have to go object-by-object while walking the blob-type bitmap
(since we can't mask out the matches, but must look up the size
individually for each blob). The trick with using ctz64() is taken from
show_objects_for_type(), which likewise needs to find individual bits
(but wants to quickly skip over big chunks without blobs).

Signed-off-by: Jeff King <peff@peff.net>
---
 pack-bitmap.c                      | 80 ++++++++++++++++++++++++++++++
 t/perf/p5310-pack-bitmaps.sh       |  5 ++
 t/t6113-rev-list-bitmap-filters.sh | 20 +++++++-
 3 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/pack-bitmap.c b/pack-bitmap.c
index f430ddc3d2..76cb60e8c3 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -773,6 +773,78 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git,
 	bitmap_free(tips);
 }
 
+static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
+				     uint32_t pos)
+{
+	struct packed_git *pack = bitmap_git->pack;
+	unsigned long size;
+	struct object_info oi = OBJECT_INFO_INIT;
+
+	oi.sizep = &size;
+
+	if (pos < pack->num_objects) {
+		struct revindex_entry *entry = &pack->revindex[pos];
+		if (packed_object_info(the_repository, pack,
+				       entry->offset, &oi) < 0) {
+			struct object_id oid;
+			nth_packed_object_oid(&oid, pack, entry->nr);
+			die(_("unable to get size of %s"), oid_to_hex(&oid));
+		}
+	} else {
+		struct eindex *eindex = &bitmap_git->ext_index;
+		struct object *obj = eindex->objects[pos - pack->num_objects];
+		if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+			die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
+	}
+
+	return size;
+}
+
+static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git,
+				     struct object_list *tip_objects,
+				     struct bitmap *to_filter,
+				     unsigned long limit)
+{
+	struct eindex *eindex = &bitmap_git->ext_index;
+	struct bitmap *tips;
+	struct ewah_iterator it;
+	eword_t mask;
+	uint32_t i;
+
+	tips = find_tip_blobs(bitmap_git, tip_objects);
+
+	for (i = 0, init_type_iterator(&it, bitmap_git, OBJ_BLOB);
+	     i < to_filter->word_alloc && ewah_iterator_next(&mask, &it);
+	     i++) {
+		eword_t word = to_filter->words[i] & mask;
+		unsigned offset;
+
+		for (offset = 0; offset < BITS_IN_EWORD; offset++) {
+			uint32_t pos;
+
+			if ((word >> offset) == 0)
+				break;
+			offset += ewah_bit_ctz64(word >> offset);
+			pos = i * BITS_IN_EWORD + offset;
+
+			if (!bitmap_get(tips, pos) &&
+			    get_size_by_pos(bitmap_git, pos) >= limit)
+				bitmap_unset(to_filter, pos);
+		}
+	}
+
+	for (i = 0; i < eindex->count; i++) {
+		uint32_t pos = i + bitmap_git->pack->num_objects;
+		if (eindex->objects[i]->type == OBJ_BLOB &&
+		    bitmap_get(to_filter, pos) &&
+		    !bitmap_get(tips, pos) &&
+		    get_size_by_pos(bitmap_git, pos) >= limit)
+			bitmap_unset(to_filter, pos);
+	}
+
+	bitmap_free(tips);
+}
+
 static int filter_bitmap(struct bitmap_index *bitmap_git,
 			 struct object_list *tip_objects,
 			 struct bitmap *to_filter,
@@ -788,6 +860,14 @@ static int filter_bitmap(struct bitmap_index *bitmap_git,
 		return 0;
 	}
 
+	if (filter->choice == LOFC_BLOB_LIMIT) {
+		if (bitmap_git)
+			filter_bitmap_blob_limit(bitmap_git, tip_objects,
+						 to_filter,
+						 filter->blob_limit_value);
+		return 0;
+	}
+
 	/* filter choice not handled */
 	return -1;
 }
diff --git a/t/perf/p5310-pack-bitmaps.sh b/t/perf/p5310-pack-bitmaps.sh
index 3383983450..bbe1eb26a9 100755
--- a/t/perf/p5310-pack-bitmaps.sh
+++ b/t/perf/p5310-pack-bitmaps.sh
@@ -44,6 +44,11 @@ test_perf 'rev-list count with blob:none' '
 		--filter=blob:none >/dev/null
 '
 
+test_perf 'rev-list count with blob:limit=1k' '
+	git rev-list --use-bitmap-index --count --objects --all \
+		--filter=blob:limit=1k >/dev/null
+'
+
 test_expect_success 'create partial bitmap state' '
 	# pick a commit to represent the repo tip in the past
 	cutoff=$(git rev-list HEAD~100 -1) &&
diff --git a/t/t6113-rev-list-bitmap-filters.sh b/t/t6113-rev-list-bitmap-filters.sh
index feaa6c0989..0878f72828 100755
--- a/t/t6113-rev-list-bitmap-filters.sh
+++ b/t/t6113-rev-list-bitmap-filters.sh
@@ -6,8 +6,10 @@ test_description='rev-list combining bitmaps and filters'
 test_expect_success 'set up bitmapped repo' '
 	# one commit will have bitmaps, the other will not
 	test_commit one &&
+	test_commit much-larger-blob-one &&
 	git repack -adb &&
-	test_commit two
+	test_commit two &&
+	test_commit much-larger-blob-two
 '
 
 test_expect_success 'filters fallback to non-bitmap traversal' '
@@ -56,4 +58,20 @@ test_expect_success 'blob:none filter with specified blob' '
 	cmp_bitmap_traversal expect actual
 '
 
+test_expect_success 'blob:limit filter' '
+	git rev-list --objects --filter=blob:limit=5 HEAD >expect &&
+	git rev-list --use-bitmap-index \
+		     --objects --filter=blob:limit=5 HEAD >actual &&
+	cmp_bitmap_traversal expect actual
+'
+
+test_expect_success 'blob:limit filter with specified blob' '
+	git rev-list --objects --filter=blob:limit=5 \
+		     HEAD HEAD:much-larger-blob-two.t >expect &&
+	git rev-list --use-bitmap-index \
+		     --objects --filter=blob:limit=5 \
+		     HEAD HEAD:much-larger-blob-two.t >actual &&
+	cmp_bitmap_traversal expect actual
+'
+
 test_done
-- 
2.25.0.785.g49bcbe7794


  parent reply index

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-13  2:15 [PATCH 0/13] combining object filters and bitmaps Jeff King
2020-02-13  2:16 ` [PATCH 01/13] pack-bitmap: factor out type iterator initialization Jeff King
2020-02-13 17:45   ` Junio C Hamano
2020-02-13  2:16 ` [PATCH 02/13] pack-bitmap: fix leak of haves/wants object lists Jeff King
2020-02-13 18:12   ` Junio C Hamano
2020-02-13  2:17 ` [PATCH 03/13] rev-list: fallback to non-bitmap traversal when filtering Jeff King
2020-02-13 18:19   ` Junio C Hamano
2020-02-13 18:40     ` Jeff King
2020-02-13  2:17 ` [PATCH 04/13] rev-list: consolidate bitmap-disabling options Jeff King
2020-02-13  2:18 ` [PATCH 05/13] rev-list: factor out bitmap-optimized routines Jeff King
2020-02-13 18:34   ` Junio C Hamano
2020-02-13  2:19 ` [PATCH 06/13] rev-list: make --count work with --objects Jeff King
2020-02-13 19:14   ` Junio C Hamano
2020-02-13 20:27     ` Jeff King
2020-02-13  2:20 ` [PATCH 07/13] rev-list: allow bitmaps when counting objects Jeff King
2020-02-13 21:47   ` Junio C Hamano
2020-02-13 22:27     ` Jeff King
2020-02-13  2:20 ` [PATCH 08/13] pack-bitmap: basic noop bitmap filter infrastructure Jeff King
2020-02-13  2:21 ` [PATCH 09/13] rev-list: use bitmap filters for traversal Jeff King
2020-02-13 22:22   ` Junio C Hamano
2020-02-13 22:34     ` Jeff King
2020-02-13  2:21 ` [PATCH 10/13] bitmap: add bitmap_unset() function Jeff King
2020-02-13  2:23 ` [PATCH 11/13] pack-bitmap: implement BLOB_NONE filtering Jeff King
2020-02-13  2:25 ` Jeff King [this message]
2020-02-13 23:17   ` [PATCH 12/13] pack-bitmap: implement BLOB_LIMIT filtering Junio C Hamano
2020-02-13  2:25 ` [PATCH 13/13] pack-objects: support filters with bitmaps Jeff King
2020-02-14 18:21 ` [PATCH v2 0/15] combining object filters and bitmaps Jeff King
2020-02-14 18:22   ` [PATCH v2 01/15] pack-bitmap: factor out type iterator initialization Jeff King
2020-02-15  0:10     ` Taylor Blau
2020-02-14 18:22   ` [PATCH v2 02/15] pack-bitmap: fix leak of haves/wants object lists Jeff King
2020-02-15  0:15     ` Taylor Blau
2020-02-15  6:46       ` Jeff King
2020-02-18 17:58     ` Derrick Stolee
2020-02-18 20:02       ` Jeff King
2020-02-14 18:22   ` [PATCH v2 03/15] rev-list: fallback to non-bitmap traversal when filtering Jeff King
2020-02-15  0:22     ` Taylor Blau
2020-02-14 18:22   ` [PATCH v2 04/15] pack-bitmap: refuse to do a bitmap traversal with pathspecs Jeff King
2020-02-14 19:03     ` Junio C Hamano
2020-02-14 20:51       ` Jeff King
2020-02-14 18:22   ` [PATCH v2 05/15] rev-list: factor out bitmap-optimized routines Jeff King
2020-02-15  0:35     ` Taylor Blau
2020-02-14 18:22   ` [PATCH v2 06/15] rev-list: make --count work with --objects Jeff King
2020-02-15  0:42     ` Taylor Blau
2020-02-15  6:48       ` Jeff King
2020-02-16 23:34         ` Junio C Hamano
2020-02-18  5:24           ` Jeff King
2020-02-18 17:28             ` Junio C Hamano
2020-02-18 19:55               ` Jeff King
2020-02-18 21:19                 ` Junio C Hamano
2020-02-18 21:23                   ` Jeff King
2020-02-18 18:05     ` Derrick Stolee
2020-02-18 19:59       ` Jeff King
2020-02-14 18:22   ` [PATCH v2 07/15] rev-list: allow bitmaps when counting objects Jeff King
2020-02-15  0:45     ` Taylor Blau
2020-02-15  6:55       ` Jeff King
2020-02-16 23:36         ` Junio C Hamano
2020-02-14 18:22   ` [PATCH v2 08/15] t5310: factor out bitmap traversal comparison Jeff King
2020-02-15  2:14     ` Taylor Blau
2020-02-15  7:00       ` Jeff King
2020-02-14 18:22   ` [PATCH v2 09/15] rev-list: allow commit-only bitmap traversals Jeff King
2020-02-18 18:18     ` Derrick Stolee
2020-02-18 20:05       ` Jeff King
2020-02-18 20:11         ` Derrick Stolee
2020-02-14 18:22   ` [PATCH v2 10/15] pack-bitmap: basic noop bitmap filter infrastructure Jeff King
2020-02-14 18:22   ` [PATCH v2 11/15] rev-list: use bitmap filters for traversal Jeff King
2020-02-14 18:22   ` [PATCH v2 12/15] bitmap: add bitmap_unset() function Jeff King
2020-02-14 18:22   ` [PATCH v2 13/15] pack-bitmap: implement BLOB_NONE filtering Jeff King
2020-02-18 19:26     ` Derrick Stolee
2020-02-18 19:36       ` Derrick Stolee
2020-02-18 20:30         ` Jeff King
2020-02-18 20:24       ` Jeff King
2020-02-14 18:22   ` [PATCH v2 14/15] pack-bitmap: implement BLOB_LIMIT filtering Jeff King
2020-02-14 18:22   ` [PATCH v2 15/15] pack-objects: support filters with bitmaps Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200213022523.GL1126038@coredump.intra.peff.net \
    --to=peff@peff.net \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Mailing List Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/git/0 git/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 git git/ https://lore.kernel.org/git \
		git@vger.kernel.org
	public-inbox-index git

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.git


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git