git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Phillip Wood <phillip.wood123@gmail.com>
To: "Michał Kępień" <michal@isc.org>, git@vger.kernel.org
Subject: Re: [PATCH v3 2/2] diff: add -I<regex> that ignores matching changes
Date: Fri, 16 Oct 2020 16:32:47 +0100	[thread overview]
Message-ID: <afd3b1cf-b883-6df5-bea5-28f8e06d8702@gmail.com> (raw)
In-Reply-To: <20201015072406.4506-3-michal@isc.org>

Hi Michał

Thanks for working on this, it will be a useful addition. Unfortunately 
there's a use-after-free error see below

On 15/10/2020 08:24, Michał Kępień wrote:
> Add a new diff option that enables ignoring changes whose all lines
> (changed, removed, and added) match a given regular expression.  This is
> similar to the -I/--ignore-matching-lines option in standalone diff
> utilities and can be used e.g. to ignore changes which only affect code
> comments or to look for unrelated changes in commits containing a large
> number of automatically applied modifications (e.g. a tree-wide string
> replacement).  The difference between -G/-S and the new -I option is
> that the latter filters output on a per-change basis.
> 
> Use the 'ignore' field of xdchange_t for marking a change as ignored or
> not.  Since the same field is used by --ignore-blank-lines, identical
> hunk emitting rules apply for --ignore-blank-lines and -I.  These two
> options can also be used together in the same git invocation (they are
> complementary to each other).
> 
> Rename xdl_mark_ignorable() to xdl_mark_ignorable_lines(), to indicate
> that it is logically a "sibling" of xdl_mark_ignorable_regex() rather
> than its "parent".
> 
> Signed-off-by: Michał Kępień <michal@isc.org>
> ---
>   Documentation/diff-options.txt |  5 ++++
>   diff.c                         | 28 ++++++++++++++++++++
>   diff.h                         |  4 +++
>   t/t4013-diff-various.sh        | 33 ++++++++++++++++++++++++
>   xdiff/xdiff.h                  |  4 +++
>   xdiff/xdiffi.c                 | 47 ++++++++++++++++++++++++++++++++--
>   6 files changed, 119 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
> index 573fb9bb71..ee52b65e46 100644
> --- a/Documentation/diff-options.txt
> +++ b/Documentation/diff-options.txt
> @@ -687,6 +687,11 @@ endif::git-format-patch[]
>   --ignore-blank-lines::
>   	Ignore changes whose lines are all blank.
>   
> +-I<regex>::
> +--ignore-matching-lines=<regex>::
> +	Ignore changes whose all lines match <regex>.  This option may
> +	be specified more than once.
> +
>   --inter-hunk-context=<lines>::
>   	Show the context between diff hunks, up to the specified number
>   	of lines, thereby fusing hunks that are close to each other.
> diff --git a/diff.c b/diff.c
> index 2bb2f8f57e..677de23352 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -3587,6 +3587,8 @@ static void builtin_diff(const char *name_a,
>   		if (header.len && !o->flags.suppress_diff_headers)
>   			ecbdata.header = &header;
>   		xpp.flags = o->xdl_opts;
> +		xpp.ignore_regex = o->ignore_regex;
> +		xpp.ignore_regex_nr = o->ignore_regex_nr;
>   		xpp.anchors = o->anchors;
>   		xpp.anchors_nr = o->anchors_nr;
>   		xecfg.ctxlen = o->context;
> @@ -3716,6 +3718,8 @@ static void builtin_diffstat(const char *name_a, const char *name_b,
>   		memset(&xpp, 0, sizeof(xpp));
>   		memset(&xecfg, 0, sizeof(xecfg));
>   		xpp.flags = o->xdl_opts;
> +		xpp.ignore_regex = o->ignore_regex;
> +		xpp.ignore_regex_nr = o->ignore_regex_nr;
>   		xpp.anchors = o->anchors;
>   		xpp.anchors_nr = o->anchors_nr;
>   		xecfg.ctxlen = o->context;
> @@ -5203,6 +5207,22 @@ static int diff_opt_patience(const struct option *opt,
>   	return 0;
>   }
>   
> +static int diff_opt_ignore_regex(const struct option *opt,
> +				 const char *arg, int unset)
> +{
> +	struct diff_options *options = opt->value;
> +	regex_t *regex;
> +
> +	BUG_ON_OPT_NEG(unset);
> +	regex = xmalloc(sizeof(*regex));
> +	if (regcomp(regex, arg, REG_EXTENDED | REG_NEWLINE))
> +		return error(_("invalid regex given to -I: '%s'"), arg);
> +	ALLOC_GROW(options->ignore_regex, options->ignore_regex_nr + 1,
> +		   options->ignore_regex_alloc);
> +	options->ignore_regex[options->ignore_regex_nr++] = regex;
> +	return 0;
> +}
> +
>   static int diff_opt_pickaxe_regex(const struct option *opt,
>   				  const char *arg, int unset)
>   {
> @@ -5491,6 +5511,9 @@ static void prep_parse_options(struct diff_options *options)
>   		OPT_BIT_F(0, "ignore-blank-lines", &options->xdl_opts,
>   			  N_("ignore changes whose lines are all blank"),
>   			  XDF_IGNORE_BLANK_LINES, PARSE_OPT_NONEG),
> +		OPT_CALLBACK_F('I', "ignore-matching-lines", options, N_("<regex>"),
> +			       N_("ignore changes whose all lines match <regex>"),
> +			       0, diff_opt_ignore_regex),
>   		OPT_BIT(0, "indent-heuristic", &options->xdl_opts,
>   			N_("heuristic to shift diff hunk boundaries for easy reading"),
>   			XDF_INDENT_HEURISTIC),
> @@ -6405,6 +6428,11 @@ void diff_flush(struct diff_options *options)
>   	DIFF_QUEUE_CLEAR(q);
>   	if (options->close_file)
>   		fclose(options->file);
> +	for (i = 0; i < options->ignore_regex_nr; i++) {
> +		regfree(options->ignore_regex[i]);
> +		free(options->ignore_regex[i]);
> +	}
> +	free(options->ignore_regex);

If I run `git log -p -I foo` then the address sanitizer reports

AddressSanitizer: heap-use-after-free xdiff/xdiffi.c:1027 in 
record_matches_regex

after it has printed the diff for the first commit. I think freeing the 
regex here is the cause of the problem.

Best Wishes

Phillip

>   	/*
>   	 * Report the content-level differences with HAS_CHANGES;
> diff --git a/diff.h b/diff.h
> index 11de52e9e9..a402227b80 100644
> --- a/diff.h
> +++ b/diff.h
> @@ -234,6 +234,10 @@ struct diff_options {
>   	 */
>   	const char *pickaxe;
>   
> +	/* -I<regex> */
> +	regex_t **ignore_regex;
> +	size_t ignore_regex_nr, ignore_regex_alloc;
> +
>   	const char *single_follow;
>   	const char *a_prefix, *b_prefix;
>   	const char *line_prefix;
> diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh
> index 5c7b0122b4..efaaee2ef0 100755
> --- a/t/t4013-diff-various.sh
> +++ b/t/t4013-diff-various.sh
> @@ -6,6 +6,7 @@
>   test_description='Various diff formatting options'
>   
>   . ./test-lib.sh
> +. "$TEST_DIRECTORY"/diff-lib.sh
>   
>   test_expect_success setup '
>   
> @@ -473,4 +474,36 @@ test_expect_success 'diff-tree --stdin with log formatting' '
>   	test_cmp expect actual
>   '
>   
> +test_expect_success 'diff -I<regex>' '
> +	test_seq 50 >I.txt &&
> +	sed -e "s/13/ten and three/" -e "/7\$/d" <I.txt >J.txt &&
> +	echo >>J.txt &&
> +
> +	test_expect_code 1 git diff --no-index --ignore-blank-lines -I"ten.*e" -I"^[124-9]" I.txt J.txt >actual &&
> +	cat >expect <<-\EOF &&
> +	diff --git a/I.txt b/J.txt
> +	--- a/I.txt
> +	+++ b/J.txt
> +	@@ -34,7 +31,6 @@
> +	 34
> +	 35
> +	 36
> +	-37
> +	 38
> +	 39
> +	 40
> +	EOF
> +	compare_diff_patch expect actual &&
> +
> +	test_expect_code 1 git diff --stat --no-index --ignore-blank-lines -I"ten.*e" -I"^[124-9]" I.txt J.txt >actual &&
> +	cat >expect <<-\EOF &&
> +	 I.txt => J.txt | 1 -
> +	 1 file changed, 1 deletion(-)
> +	EOF
> +	test_cmp expect actual &&
> +
> +	test_expect_code 129 git diff --no-index --ignore-matching-lines="^[124-9]" --ignore-matching-lines="^[124-9" I.txt J.txt >output 2>&1 &&
> +	test_i18ngrep "invalid regex given to -I: " output
> +'
> +
>   test_done
> diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
> index 032e3a9f41..7a04605146 100644
> --- a/xdiff/xdiff.h
> +++ b/xdiff/xdiff.h
> @@ -79,6 +79,10 @@ typedef struct s_mmbuffer {
>   typedef struct s_xpparam {
>   	unsigned long flags;
>   
> +	/* -I<regex> */
> +	regex_t **ignore_regex;
> +	size_t ignore_regex_nr;
> +
>   	/* See Documentation/diff-options.txt. */
>   	char **anchors;
>   	size_t anchors_nr;
> diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
> index bd035139f9..380eb728ed 100644
> --- a/xdiff/xdiffi.c
> +++ b/xdiff/xdiffi.c
> @@ -998,7 +998,7 @@ static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
>   	return 0;
>   }
>   
> -static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
> +static void xdl_mark_ignorable_lines(xdchange_t *xscr, xdfenv_t *xe, long flags)
>   {
>   	xdchange_t *xch;
>   
> @@ -1019,6 +1019,46 @@ static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
>   	}
>   }
>   
> +static int record_matches_regex(xrecord_t *rec, xpparam_t const *xpp) {
> +	regmatch_t regmatch;
> +	int i;
> +
> +	for (i = 0; i < xpp->ignore_regex_nr; i++)
> +		if (!regexec_buf(xpp->ignore_regex[i], rec->ptr, rec->size, 1,
> +				 &regmatch, 0))
> +			return 1;
> +
> +	return 0;
> +}
> +
> +static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe,
> +				     xpparam_t const *xpp)
> +{
> +	xdchange_t *xch;
> +
> +	for (xch = xscr; xch; xch = xch->next) {
> +		xrecord_t **rec;
> +		int ignore = 1;
> +		long i;
> +
> +		/*
> +		 * Do not override --ignore-blank-lines.
> +		 */
> +		if (xch->ignore)
> +			continue;
> +
> +		rec = &xe->xdf1.recs[xch->i1];
> +		for (i = 0; i < xch->chg1 && ignore; i++)
> +			ignore = record_matches_regex(rec[i], xpp);
> +
> +		rec = &xe->xdf2.recs[xch->i2];
> +		for (i = 0; i < xch->chg2 && ignore; i++)
> +			ignore = record_matches_regex(rec[i], xpp);
> +
> +		xch->ignore = ignore;
> +	}
> +}
> +
>   int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
>   	     xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
>   	xdchange_t *xscr;
> @@ -1038,7 +1078,10 @@ int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
>   	}
>   	if (xscr) {
>   		if (xpp->flags & XDF_IGNORE_BLANK_LINES)
> -			xdl_mark_ignorable(xscr, &xe, xpp->flags);
> +			xdl_mark_ignorable_lines(xscr, &xe, xpp->flags);
> +
> +		if (xpp->ignore_regex)
> +			xdl_mark_ignorable_regex(xscr, &xe, xpp);
>   
>   		if (ef(&xe, xscr, ecb, xecfg) < 0) {
>   
> 

  reply	other threads:[~2020-10-16 15:33 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-01 12:06 [PATCH 0/2] " Michał Kępień
2020-10-01 12:06 ` [PATCH 1/2] " Michał Kępień
2020-10-01 18:21   ` Junio C Hamano
2020-10-07 19:48     ` Michał Kępień
2020-10-07 20:08       ` Junio C Hamano
2020-10-01 12:06 ` [PATCH 2/2] t: add -I<regex> tests Michał Kępień
2020-10-01 17:02 ` [PATCH 0/2] diff: add -I<regex> that ignores matching changes Junio C Hamano
2020-10-12  9:17 ` [PATCH v2 0/3] " Michał Kępień
2020-10-12  9:17   ` [PATCH v2 1/3] merge-base, xdiff: zero out xpparam_t structures Michał Kępień
2020-10-12 11:14     ` Johannes Schindelin
2020-10-12 17:09       ` Junio C Hamano
2020-10-12 19:52     ` Junio C Hamano
2020-10-13  6:35       ` Michał Kępień
2020-10-12  9:17   ` [PATCH v2 2/3] diff: add -I<regex> that ignores matching changes Michał Kępień
2020-10-12 11:20     ` Johannes Schindelin
2020-10-12 20:00       ` Junio C Hamano
2020-10-12 20:39         ` Johannes Schindelin
2020-10-12 21:43           ` Junio C Hamano
2020-10-13  6:37             ` Michał Kępień
2020-10-13 15:49               ` Junio C Hamano
2020-10-13  6:36       ` Michał Kępień
2020-10-13 12:02         ` Johannes Schindelin
2020-10-13 15:53           ` Junio C Hamano
2020-10-13 18:45           ` Michał Kępień
2020-10-12 18:01     ` Junio C Hamano
2020-10-13  6:38       ` Michał Kępień
2020-10-12 20:04     ` Junio C Hamano
2020-10-13  6:38       ` Michał Kępień
2020-10-12  9:17   ` [PATCH v2 3/3] t: add -I<regex> tests Michał Kępień
2020-10-12 11:49     ` Johannes Schindelin
2020-10-13  6:38       ` Michał Kępień
2020-10-13 12:00         ` Johannes Schindelin
2020-10-13 16:00           ` Junio C Hamano
2020-10-13 19:01           ` Michał Kępień
2020-10-15 11:45             ` Johannes Schindelin
2020-10-15  7:24   ` [PATCH v3 0/2] diff: add -I<regex> that ignores matching changes Michał Kępień
2020-10-15  7:24     ` [PATCH v3 1/2] merge-base, xdiff: zero out xpparam_t structures Michał Kępień
2020-10-15  7:24     ` [PATCH v3 2/2] diff: add -I<regex> that ignores matching changes Michał Kępień
2020-10-16 15:32       ` Phillip Wood [this message]
2020-10-16 18:04         ` Junio C Hamano
2020-10-19  9:48           ` Michał Kępień
2020-10-16 18:16       ` Junio C Hamano
2020-10-19  9:55         ` Michał Kępień
2020-10-19 17:29           ` Junio C Hamano
2020-10-16 10:00     ` [PATCH v3 0/2] " Johannes Schindelin
2020-10-20  6:48     ` [PATCH v4 " Michał Kępień
2020-10-20  6:48       ` [PATCH v4 1/2] merge-base, xdiff: zero out xpparam_t structures Michał Kępień
2020-10-20  6:48       ` [PATCH v4 2/2] diff: add -I<regex> that ignores matching changes Michał Kępień
2021-02-05 14:13       ` [PATCH 1/2] diff: add an API for deferred freeing Ævar Arnfjörð Bjarmason
2021-02-10 16:00         ` Johannes Schindelin
2021-02-11  3:00           ` Ævar Arnfjörð Bjarmason
2021-02-11  9:40             ` Johannes Schindelin
2021-02-11 10:21               ` Jeff King
2021-02-11 10:45                 ` [PATCH v2 0/2] " Ævar Arnfjörð Bjarmason
2021-02-11 10:45                 ` [PATCH v2 1/2] " Ævar Arnfjörð Bjarmason
2021-02-11 10:45                 ` [PATCH v2 2/2] diff: plug memory leak from regcomp() on {log,diff} -I Ævar Arnfjörð Bjarmason
2021-02-05 14:13       ` [PATCH " Ævar Arnfjörð Bjarmason

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=afd3b1cf-b883-6df5-bea5-28f8e06d8702@gmail.com \
    --to=phillip.wood123@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=michal@isc.org \
    --cc=phillip.wood@dunelm.org.uk \
    --subject='Re: [PATCH v3 2/2] diff: add -I<regex> that ignores matching changes' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).