Git Mailing List Archive on lore.kernel.org
 help / color / Atom feed
From: Patrick Steinhardt <ps@pks.im>
To: git@vger.kernel.org
Cc: Jeff King <peff@peff.net>,
	Christian Couder <christian.couder@gmail.com>,
	Taylor Blau <me@ttaylorr.com>
Subject: [PATCH v3 5/8] list-objects: implement object type filter
Date: Fri, 9 Apr 2021 13:28:11 +0200
Message-ID: <e2a14abf924667c51e6a5680b6a5da65e561497d.1617967252.git.ps@pks.im> (raw)
In-Reply-To: <cover.1617967252.git.ps@pks.im>


[-- Attachment #1: Type: text/plain, Size: 9502 bytes --]

While it already is possible to filter objects by some criteria in
git-rev-list(1), it is not yet possible to filter out only a specific
type of objects. This makes some filters less useful. The `blob:limit`
filter for example filters blobs such that only those which are smaller
than the given limit are returned. But it is unfit to ask only for these
smallish blobs, given that git-rev-list(1) will continue to print tags,
commits and trees.

Now that we have the infrastructure in place to also filter tags and
commits, we can improve this situation by implementing a new filter
which selects objects based on their type. Above query can thus
trivially be implemented with the following command:

    $ git rev-list --objects --filter=object:type=blob \
        --filter=blob:limit=200

Furthermore, this filter allows to optimize for certain other cases: if
for example only tags or commits have been selected, there is no need to
walk down trees.

The new filter is not yet supported in bitmaps. This is going to be
implemented in a subsequent commit.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 Documentation/config/uploadpack.txt |  6 +--
 Documentation/rev-list-options.txt  |  3 ++
 list-objects-filter-options.c       | 14 ++++++
 list-objects-filter-options.h       |  2 +
 list-objects-filter.c               | 76 +++++++++++++++++++++++++++++
 t/t6112-rev-list-filters-objects.sh | 48 ++++++++++++++++++
 6 files changed, 146 insertions(+), 3 deletions(-)

diff --git a/Documentation/config/uploadpack.txt b/Documentation/config/uploadpack.txt
index 6729a072ea..32fad5bbe8 100644
--- a/Documentation/config/uploadpack.txt
+++ b/Documentation/config/uploadpack.txt
@@ -66,9 +66,9 @@ uploadpackfilter.allow::
 uploadpackfilter.<filter>.allow::
 	Explicitly allow or ban the object filter corresponding to
 	`<filter>`, where `<filter>` may be one of: `blob:none`,
-	`blob:limit`, `tree`, `sparse:oid`, or `combine`. If using
-	combined filters, both `combine` and all of the nested filter
-	kinds must be allowed. Defaults to `uploadpackfilter.allow`.
+	`blob:limit`, `object:type`, `tree`, `sparse:oid`, or `combine`.
+	If using combined filters, both `combine` and all of the nested
+	filter kinds must be allowed. Defaults to `uploadpackfilter.allow`.
 
 uploadpackfilter.tree.maxDepth::
 	Only allow `--filter=tree:<n>` when `<n>` is no more than the value of
diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index b1c8f86c6e..3afa8fffbd 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -892,6 +892,9 @@ or units.  n may be zero.  The suffixes k, m, and g can be used to name
 units in KiB, MiB, or GiB.  For example, 'blob:limit=1k' is the same
 as 'blob:limit=1024'.
 +
+The form '--filter=object:type=(tag|commit|tree|blob)' omits all objects
+which are not of the requested type.
++
 The form '--filter=sparse:oid=<blob-ish>' uses a sparse-checkout
 specification contained in the blob (or blob-expression) '<blob-ish>'
 to omit blobs that would not be not required for a sparse checkout on
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index d2d1c81caf..bb6f6577d5 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -29,6 +29,8 @@ const char *list_object_filter_config_name(enum list_objects_filter_choice c)
 		return "tree";
 	case LOFC_SPARSE_OID:
 		return "sparse:oid";
+	case LOFC_OBJECT_TYPE:
+		return "object:type";
 	case LOFC_COMBINE:
 		return "combine";
 	case LOFC__COUNT:
@@ -97,6 +99,18 @@ static int gently_parse_list_objects_filter(
 		}
 		return 1;
 
+	} else if (skip_prefix(arg, "object:type=", &v0)) {
+		int type = type_from_string_gently(v0, -1, 1);
+		if (type < 0) {
+			strbuf_addstr(errbuf, _("expected 'object:type=<type>'"));
+			return 1;
+		}
+
+		filter_options->object_type = type;
+		filter_options->choice = LOFC_OBJECT_TYPE;
+
+		return 0;
+
 	} else if (skip_prefix(arg, "combine:", &v0)) {
 		return parse_combine_filter(filter_options, v0, errbuf);
 
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index 01767c3c96..4d0d0588cc 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -13,6 +13,7 @@ enum list_objects_filter_choice {
 	LOFC_BLOB_LIMIT,
 	LOFC_TREE_DEPTH,
 	LOFC_SPARSE_OID,
+	LOFC_OBJECT_TYPE,
 	LOFC_COMBINE,
 	LOFC__COUNT /* must be last */
 };
@@ -54,6 +55,7 @@ struct list_objects_filter_options {
 	char *sparse_oid_name;
 	unsigned long blob_limit_value;
 	unsigned long tree_exclude_depth;
+	enum object_type object_type;
 
 	/* LOFC_COMBINE values */
 
diff --git a/list-objects-filter.c b/list-objects-filter.c
index 0ebfa52966..1c1ee3d1bb 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -545,6 +545,81 @@ static void filter_sparse_oid__init(
 	filter->free_fn = filter_sparse_free;
 }
 
+/*
+ * A filter for list-objects to omit large blobs.
+ * And to OPTIONALLY collect a list of the omitted OIDs.
+ */
+struct filter_object_type_data {
+	enum object_type object_type;
+};
+
+static enum list_objects_filter_result filter_object_type(
+	struct repository *r,
+	enum list_objects_filter_situation filter_situation,
+	struct object *obj,
+	const char *pathname,
+	const char *filename,
+	struct oidset *omits,
+	void *filter_data_)
+{
+	struct filter_object_type_data *filter_data = filter_data_;
+
+	switch (filter_situation) {
+	default:
+		BUG("unknown filter_situation: %d", filter_situation);
+
+	case LOFS_TAG:
+		assert(obj->type == OBJ_TAG);
+		if (filter_data->object_type == OBJ_TAG)
+			return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+		return LOFR_MARK_SEEN;
+
+	case LOFS_COMMIT:
+		assert(obj->type == OBJ_COMMIT);
+		if (filter_data->object_type == OBJ_COMMIT)
+			return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+		return LOFR_MARK_SEEN;
+
+	case LOFS_BEGIN_TREE:
+		assert(obj->type == OBJ_TREE);
+
+		/*
+		 * If we only want to show commits or tags, then there is no
+		 * need to walk down trees.
+		 */
+		if (filter_data->object_type == OBJ_COMMIT ||
+		    filter_data->object_type == OBJ_TAG)
+			return LOFR_SKIP_TREE;
+
+		if (filter_data->object_type == OBJ_TREE)
+			return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+
+		return LOFR_MARK_SEEN;
+
+	case LOFS_BLOB:
+		assert(obj->type == OBJ_BLOB);
+
+		if (filter_data->object_type == OBJ_BLOB)
+			return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+		return LOFR_MARK_SEEN;
+
+	case LOFS_END_TREE:
+		return LOFR_ZERO;
+	}
+}
+
+static void filter_object_type__init(
+	struct list_objects_filter_options *filter_options,
+	struct filter *filter)
+{
+	struct filter_object_type_data *d = xcalloc(1, sizeof(*d));
+	d->object_type = filter_options->object_type;
+
+	filter->filter_data = d;
+	filter->filter_object_fn = filter_object_type;
+	filter->free_fn = free;
+}
+
 /* A filter which only shows objects shown by all sub-filters. */
 struct combine_filter_data {
 	struct subfilter *sub;
@@ -691,6 +766,7 @@ static filter_init_fn s_filters[] = {
 	filter_blobs_limit__init,
 	filter_trees_depth__init,
 	filter_sparse_oid__init,
+	filter_object_type__init,
 	filter_combine__init,
 };
 
diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh
index 31457d13b9..c79ec04060 100755
--- a/t/t6112-rev-list-filters-objects.sh
+++ b/t/t6112-rev-list-filters-objects.sh
@@ -159,6 +159,54 @@ test_expect_success 'verify blob:limit=1m' '
 	test_must_be_empty observed
 '
 
+# Test object:type=<type> filter.
+
+test_expect_success 'setup object-type' '
+	git init object-type &&
+	echo contents >object-type/blob &&
+	git -C object-type add blob &&
+	git -C object-type commit -m commit-message &&
+	git -C object-type tag tag -m tag-message
+'
+
+test_expect_success 'verify object:type= fails with invalid type' '
+	test_must_fail git -C object-type rev-list --objects --filter=object:type= HEAD &&
+	test_must_fail git -C object-type rev-list --objects --filter=object:type=invalid HEAD
+'
+
+test_expect_success 'verify object:type=blob prints blob and commit' '
+	(
+		git -C object-type rev-parse HEAD &&
+		printf "%s blob\n" $(git -C object-type rev-parse HEAD:blob)
+	) >expected &&
+	git -C object-type rev-list --objects --filter=object:type=blob HEAD >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'verify object:type=tree prints tree and commit' '
+	(
+		git -C object-type rev-parse HEAD &&
+		printf "%s \n" $(git -C object-type rev-parse HEAD^{tree})
+	) >expected &&
+	git -C object-type rev-list --objects --filter=object:type=tree HEAD >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'verify object:type=commit prints commit' '
+	git -C object-type rev-parse HEAD >expected &&
+	git -C object-type rev-list --objects --filter=object:type=commit HEAD >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'verify object:type=tag prints tag' '
+	(
+		git -C object-type rev-parse HEAD &&
+		printf "%s tag\n" $(git -C object-type rev-parse tag)
+	) >expected &&
+	git -C object-type rev-list --objects --filter=object:type=tag tag >actual &&
+	test_cmp expected actual
+'
+
 # Test sparse:path=<path> filter.
 # !!!!
 # NOTE: sparse:path filter support has been dropped for security reasons,
-- 
2.31.1


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  parent reply index

Thread overview: 97+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-01 12:20 [PATCH 0/7] rev-parse: " Patrick Steinhardt
2021-03-01 12:20 ` [PATCH 1/7] revision: mark commit parents as NOT_USER_GIVEN Patrick Steinhardt
2021-03-01 12:20 ` [PATCH 2/7] list-objects: move tag processing into its own function Patrick Steinhardt
2021-03-01 12:20 ` [PATCH 3/7] list-objects: support filtering by tag and commit Patrick Steinhardt
2021-03-01 12:20 ` [PATCH 4/7] list-objects: implement object type filter Patrick Steinhardt
2021-03-01 12:20 ` [PATCH 5/7] pack-bitmap: " Patrick Steinhardt
2021-03-01 12:20 ` [PATCH 6/7] pack-bitmap: implement combined filter Patrick Steinhardt
2021-03-01 12:21 ` [PATCH 7/7] rev-list: allow filtering of provided items Patrick Steinhardt
2021-03-10 21:39 ` [PATCH 0/7] rev-parse: implement object type filter Jeff King
2021-03-11 14:38   ` Patrick Steinhardt
2021-03-11 17:54     ` Jeff King
2021-03-15 11:25   ` Patrick Steinhardt
2021-03-10 21:58 ` Taylor Blau
2021-03-10 22:19   ` Jeff King
2021-03-11 14:43     ` Patrick Steinhardt
2021-03-11 17:56       ` Jeff King
2021-03-15 13:14 ` [PATCH v2 0/8] " Patrick Steinhardt
2021-03-15 13:14   ` [PATCH v2 1/8] uploadpack.txt: document implication of `uploadpackfilter.allow` Patrick Steinhardt
2021-04-06 17:17     ` Jeff King
2021-03-15 13:14   ` [PATCH v2 2/8] revision: mark commit parents as NOT_USER_GIVEN Patrick Steinhardt
2021-04-06 17:30     ` Jeff King
2021-04-09 10:19       ` Patrick Steinhardt
2021-03-15 13:14   ` [PATCH v2 3/8] list-objects: move tag processing into its own function Patrick Steinhardt
2021-04-06 17:39     ` Jeff King
2021-03-15 13:14   ` [PATCH v2 4/8] list-objects: support filtering by tag and commit Patrick Steinhardt
2021-03-15 13:14   ` [PATCH v2 5/8] list-objects: implement object type filter Patrick Steinhardt
2021-04-06 17:42     ` Jeff King
2021-03-15 13:14   ` [PATCH v2 6/8] pack-bitmap: " Patrick Steinhardt
2021-04-06 17:48     ` Jeff King
2021-03-15 13:14   ` [PATCH v2 7/8] pack-bitmap: implement combined filter Patrick Steinhardt
2021-04-06 17:54     ` Jeff King
2021-04-09 10:31       ` Patrick Steinhardt
2021-04-09 15:53         ` Jeff King
2021-04-09 11:17       ` Patrick Steinhardt
2021-04-09 15:55         ` Jeff King
2021-03-15 13:15   ` [PATCH v2 8/8] rev-list: allow filtering of provided items Patrick Steinhardt
2021-04-06 18:04     ` Jeff King
2021-04-09 10:59       ` Patrick Steinhardt
2021-04-09 15:58         ` Jeff King
2021-03-20 21:10   ` [PATCH v2 0/8] rev-parse: implement object type filter Junio C Hamano
2021-04-06 18:08     ` Jeff King
2021-04-09 11:14       ` Patrick Steinhardt
2021-04-09 16:05         ` Jeff King
2021-04-09 11:27   ` [PATCH v3 " Patrick Steinhardt
2021-04-09 11:27     ` [PATCH v3 1/8] uploadpack.txt: document implication of `uploadpackfilter.allow` Patrick Steinhardt
2021-04-09 11:27     ` [PATCH v3 2/8] revision: mark commit parents as NOT_USER_GIVEN Patrick Steinhardt
2021-04-09 11:28     ` [PATCH v3 3/8] list-objects: move tag processing into its own function Patrick Steinhardt
2021-04-09 11:28     ` [PATCH v3 4/8] list-objects: support filtering by tag and commit Patrick Steinhardt
2021-04-11  6:49       ` Junio C Hamano
2021-04-09 11:28     ` Patrick Steinhardt [this message]
2021-04-09 11:28     ` [PATCH v3 6/8] pack-bitmap: implement object type filter Patrick Steinhardt
2021-04-09 11:28     ` [PATCH v3 7/8] pack-bitmap: implement combined filter Patrick Steinhardt
2021-04-09 11:28     ` [PATCH v3 8/8] rev-list: allow filtering of provided items Patrick Steinhardt
2021-04-09 11:32       ` [RESEND PATCH " Patrick Steinhardt
2021-04-09 15:00       ` [PATCH " Philip Oakley
2021-04-12 13:15         ` Patrick Steinhardt
2021-04-11  6:02     ` [PATCH v3 0/8] rev-parse: implement object type filter Junio C Hamano
2021-04-12 13:12       ` Patrick Steinhardt
2021-04-12 13:37     ` [PATCH v4 0/8] rev-list: " Patrick Steinhardt
2021-04-12 13:37       ` [PATCH v4 1/8] uploadpack.txt: document implication of `uploadpackfilter.allow` Patrick Steinhardt
2021-04-12 13:37       ` [PATCH v4 2/8] revision: mark commit parents as NOT_USER_GIVEN Patrick Steinhardt
2021-04-12 13:37       ` [PATCH v4 3/8] list-objects: move tag processing into its own function Patrick Steinhardt
2021-04-12 13:37       ` [PATCH v4 4/8] list-objects: support filtering by tag and commit Patrick Steinhardt
2021-04-12 13:37       ` [PATCH v4 5/8] list-objects: implement object type filter Patrick Steinhardt
2021-04-13  9:57         ` Ævar Arnfjörð Bjarmason
2021-04-13 10:43           ` Andreas Schwab
2021-04-14 11:32           ` Patrick Steinhardt
2021-04-12 13:37       ` [PATCH v4 6/8] pack-bitmap: " Patrick Steinhardt
2021-04-12 13:37       ` [PATCH v4 7/8] pack-bitmap: implement combined filter Patrick Steinhardt
2021-04-12 13:37       ` [PATCH v4 8/8] rev-list: allow filtering of provided items Patrick Steinhardt
2021-04-13  7:45       ` [PATCH v4 0/8] rev-list: implement object type filter Jeff King
2021-04-13  8:06         ` Patrick Steinhardt
2021-04-15  9:42           ` Jeff King
2021-04-16 22:06             ` Junio C Hamano
2021-04-16 23:15               ` Junio C Hamano
2021-04-17  1:17                 ` Ramsay Jones
2021-04-17  9:01                   ` Jeff King
2021-04-17 21:45                     ` Junio C Hamano
2021-04-13 21:03         ` Junio C Hamano
2021-04-14 11:59           ` Patrick Steinhardt
2021-04-14 21:07             ` Junio C Hamano
2021-04-15  9:57               ` Jeff King
2021-04-15 17:53                 ` Junio C Hamano
2021-04-15 17:57                   ` Junio C Hamano
2021-04-17  8:58                     ` Jeff King
2021-04-19 11:46       ` [PATCH v5 " Patrick Steinhardt
2021-04-19 11:46         ` [PATCH v5 1/8] uploadpack.txt: document implication of `uploadpackfilter.allow` Patrick Steinhardt
2021-04-19 11:46         ` [PATCH v5 2/8] revision: mark commit parents as NOT_USER_GIVEN Patrick Steinhardt
2021-04-19 11:46         ` [PATCH v5 3/8] list-objects: move tag processing into its own function Patrick Steinhardt
2021-04-19 11:46         ` [PATCH v5 4/8] list-objects: support filtering by tag and commit Patrick Steinhardt
2021-04-19 11:46         ` [PATCH v5 5/8] list-objects: implement object type filter Patrick Steinhardt
2021-04-19 11:46         ` [PATCH v5 6/8] pack-bitmap: " Patrick Steinhardt
2021-04-19 11:47         ` [PATCH v5 7/8] pack-bitmap: implement combined filter Patrick Steinhardt
2021-04-19 11:47         ` [PATCH v5 8/8] rev-list: allow filtering of provided items Patrick Steinhardt
2021-04-19 23:16         ` [PATCH v5 0/8] rev-list: implement object type filter Junio C Hamano
2021-04-23  9:13           ` Jeff King
2021-04-28  2:18             ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e2a14abf924667c51e6a5680b6a5da65e561497d.1617967252.git.ps@pks.im \
    --to=ps@pks.im \
    --cc=christian.couder@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=me@ttaylorr.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Mailing List Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/git/0 git/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 git git/ https://lore.kernel.org/git \
		git@vger.kernel.org
	public-inbox-index git

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.git


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git