git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] Consolidate find_header logic into one function
@ 2021-12-27 18:26 John Cai via GitGitGadget
  2021-12-27 18:26 ` [PATCH 1/2] receive-pack.c: consolidate find header logic John Cai via GitGitGadget
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: John Cai via GitGitGadget @ 2021-12-27 18:26 UTC (permalink / raw)
  To: git; +Cc: John Cai

This addresses the NEEDSWORK comment in builtin/receive-pack.c:

 /**
   * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
   * after dropping "_commit" from its name and possibly moving it out
   * of commit.c
   **/


There is some duplicated logic between find_header and find_commit_header
that can be consolidated instead of having two places in the code that do
essentially the same thing. For the sake of simpler and more DRY code, use
find_commit_header and rename it to find_header since it is not limited to
finding headers for only commits.

This is my first attempt to address a NEEDSWORK issue, and I'm relatively
new to C so any feedback would be appreciated!

John Cai (2):
  receive-pack.c: consolidate find header logic
  commit.c: rename find_commit_header to find_header

 builtin/am.c                |  2 +-
 builtin/commit.c            |  2 +-
 builtin/receive-pack.c      | 48 ++++++++++++++++---------------------
 commit.c                    |  7 +++---
 commit.h                    |  2 +-
 gpg-interface.c             |  2 +-
 pretty.c                    |  2 +-
 sequencer.c                 |  2 +-
 t/helper/test-fast-rebase.c |  2 +-
 9 files changed, 32 insertions(+), 37 deletions(-)


base-commit: 2ae0a9cb8298185a94e5998086f380a355dd8907
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1125%2Fjohn-cai%2Fjc%2Freplace-find-header-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1125/john-cai/jc/replace-find-header-v1
Pull-Request: https://github.com/git/git/pull/1125
-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/2] receive-pack.c: consolidate find header logic
  2021-12-27 18:26 [PATCH 0/2] Consolidate find_header logic into one function John Cai via GitGitGadget
@ 2021-12-27 18:26 ` John Cai via GitGitGadget
  2021-12-27 22:33   ` Junio C Hamano
  2021-12-27 18:26 ` [PATCH 2/2] commit.c: rename find_commit_header to find_header John Cai via GitGitGadget
  2021-12-29  6:19 ` [PATCH v2] receive-pack.c: consolidate find header logic John Cai via GitGitGadget
  2 siblings, 1 reply; 16+ messages in thread
From: John Cai via GitGitGadget @ 2021-12-27 18:26 UTC (permalink / raw)
  To: git; +Cc: John Cai, John Cai

From: John Cai <johncai86@gmail.com>

There are two functions that have very similar logic of finding a header
value. find_commit_header, and find_header. We can conslidate the logic
by using find_commit_header and replacing the logic in find_header.
This helps clean up the code, as the logic for finding header values can
stay in one place.

Signed-off-by: John Cai <johncai86@gmail.com>
---
 builtin/receive-pack.c | 48 ++++++++++++++++++------------------------
 commit.c               |  3 ++-
 2 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 4f92e6f059d..939d4b28b7c 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -581,32 +581,26 @@ static char *prepare_push_cert_nonce(const char *path, timestamp_t stamp)
 	return strbuf_detach(&buf, NULL);
 }
 
-/*
- * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
- * after dropping "_commit" from its name and possibly moving it out
- * of commit.c
- */
-static char *find_header(const char *msg, size_t len, const char *key,
-			 const char **next_line)
+static char *find_header_value(const char *msg, const char *key, const char **next_line)
 {
-	int key_len = strlen(key);
-	const char *line = msg;
-
-	while (line && line < msg + len) {
-		const char *eol = strchrnul(line, '\n');
-
-		if ((msg + len <= eol) || line == eol)
-			return NULL;
-		if (line + key_len < eol &&
-		    !memcmp(line, key, key_len) && line[key_len] == ' ') {
-			int offset = key_len + 1;
-			if (next_line)
-				*next_line = *eol ? eol + 1 : eol;
-			return xmemdupz(line + offset, (eol - line) - offset);
-		}
-		line = *eol ? eol + 1 : NULL;
+	size_t out_len;
+	const char *eol;
+	char *ret;
+
+	const char *val = find_commit_header(msg, key, &out_len);
+	if (val == NULL)
+		return NULL;
+
+	eol = strchrnul(val, '\n');
+	if (next_line) {
+		*next_line = *eol ? eol + 1: eol;
 	}
-	return NULL;
+
+	ret = xmalloc(out_len+1);
+	memcpy(ret, val, out_len);
+	ret[out_len] = '\0';
+
+	return ret;
 }
 
 /*
@@ -625,7 +619,8 @@ static int constant_memequal(const char *a, const char *b, size_t n)
 
 static const char *check_nonce(const char *buf, size_t len)
 {
-	char *nonce = find_header(buf, len, "nonce", NULL);
+	char *nonce = find_header_value(buf, "nonce", NULL);
+
 	timestamp_t stamp, ostamp;
 	char *bohmac, *expect = NULL;
 	const char *retval = NONCE_BAD;
@@ -730,8 +725,7 @@ static int check_cert_push_options(const struct string_list *push_options)
 	if (!len)
 		return 1;
 
-	while ((option = find_header(buf, len, "push-option", &next_line))) {
-		len -= (next_line - buf);
+	while ((option = find_header_value(buf, "push-option", &next_line))) {
 		buf = next_line;
 		options_seen++;
 		if (options_seen > push_options->nr
diff --git a/commit.c b/commit.c
index a348f085b2b..616a6780703 100644
--- a/commit.c
+++ b/commit.c
@@ -1645,7 +1645,8 @@ const char *find_commit_header(const char *msg, const char *key, size_t *out_len
 		if (eol - line > key_len &&
 		    !strncmp(line, key, key_len) &&
 		    line[key_len] == ' ') {
-			*out_len = eol - line - key_len - 1;
+			if (out_len != NULL)
+				*out_len = eol - line - key_len - 1;
 			return line + key_len + 1;
 		}
 		line = *eol ? eol + 1 : NULL;
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/2] commit.c: rename find_commit_header to find_header
  2021-12-27 18:26 [PATCH 0/2] Consolidate find_header logic into one function John Cai via GitGitGadget
  2021-12-27 18:26 ` [PATCH 1/2] receive-pack.c: consolidate find header logic John Cai via GitGitGadget
@ 2021-12-27 18:26 ` John Cai via GitGitGadget
  2021-12-29  6:19 ` [PATCH v2] receive-pack.c: consolidate find header logic John Cai via GitGitGadget
  2 siblings, 0 replies; 16+ messages in thread
From: John Cai via GitGitGadget @ 2021-12-27 18:26 UTC (permalink / raw)
  To: git; +Cc: John Cai, John Cai

From: John Cai <johncai86@gmail.com>

Since find_commit_header's logic is not just limited to commit objects,
we can rename this to a more general find_header function so it's
clearer that it can be used in a more general way.

Signed-off-by: John Cai <johncai86@gmail.com>
---
 builtin/am.c                | 2 +-
 builtin/commit.c            | 2 +-
 builtin/receive-pack.c      | 8 ++++----
 commit.c                    | 4 ++--
 commit.h                    | 2 +-
 gpg-interface.c             | 2 +-
 pretty.c                    | 2 +-
 sequencer.c                 | 2 +-
 t/helper/test-fast-rebase.c | 2 +-
 9 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/builtin/am.c b/builtin/am.c
index 8677ea2348a..1ed5071ff49 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -1312,7 +1312,7 @@ static void get_commit_info(struct am_state *state, struct commit *commit)
 
 	buffer = logmsg_reencode(commit, NULL, get_commit_output_encoding());
 
-	ident_line = find_commit_header(buffer, "author", &ident_len);
+	ident_line = find_header(buffer, "author", &ident_len);
 	if (!ident_line)
 		die(_("missing author line in commit %s"),
 		      oid_to_hex(&commit->object.oid));
diff --git a/builtin/commit.c b/builtin/commit.c
index 883c16256c8..6fbb154ee32 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -614,7 +614,7 @@ static void determine_author_info(struct strbuf *author_ident)
 		size_t len;
 		const char *a;
 
-		a = find_commit_header(author_message_buffer, "author", &len);
+		a = find_header(author_message_buffer, "author", &len);
 		if (!a)
 			die(_("commit '%s' lacks author header"), author_message);
 		if (split_ident_line(&ident, a, len) < 0)
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 939d4b28b7c..f0fc826b665 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -581,13 +581,13 @@ static char *prepare_push_cert_nonce(const char *path, timestamp_t stamp)
 	return strbuf_detach(&buf, NULL);
 }
 
-static char *find_header_value(const char *msg, const char *key, const char **next_line)
+static char *find_header_nextline(const char *msg, const char *key, const char **next_line)
 {
 	size_t out_len;
 	const char *eol;
 	char *ret;
 
-	const char *val = find_commit_header(msg, key, &out_len);
+	const char *val = find_header(msg, key, &out_len);
 	if (val == NULL)
 		return NULL;
 
@@ -619,7 +619,7 @@ static int constant_memequal(const char *a, const char *b, size_t n)
 
 static const char *check_nonce(const char *buf, size_t len)
 {
-	char *nonce = find_header_value(buf, "nonce", NULL);
+	char *nonce = find_header_nextline(buf, "nonce", NULL);
 
 	timestamp_t stamp, ostamp;
 	char *bohmac, *expect = NULL;
@@ -725,7 +725,7 @@ static int check_cert_push_options(const struct string_list *push_options)
 	if (!len)
 		return 1;
 
-	while ((option = find_header_value(buf, "push-option", &next_line))) {
+	while ((option = find_header_nextline(buf, "push-option", &next_line))) {
 		buf = next_line;
 		options_seen++;
 		if (options_seen > push_options->nr
diff --git a/commit.c b/commit.c
index 616a6780703..bdedbe295f5 100644
--- a/commit.c
+++ b/commit.c
@@ -734,7 +734,7 @@ void record_author_date(struct author_date_slab *author_date,
 	char *date_end;
 	timestamp_t date;
 
-	ident_line = find_commit_header(buffer, "author", &ident_len);
+	ident_line = find_header(buffer, "author", &ident_len);
 	if (!ident_line)
 		goto fail_exit; /* no author line */
 	if (split_ident_line(&ident, ident_line, ident_len) ||
@@ -1631,7 +1631,7 @@ struct commit_list **commit_list_append(struct commit *commit,
 	return &new_commit->next;
 }
 
-const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
+const char *find_header(const char *msg, const char *key, size_t *out_len)
 {
 	int key_len = strlen(key);
 	const char *line = msg;
diff --git a/commit.h b/commit.h
index 3ea32766bcb..18a1460c4b4 100644
--- a/commit.h
+++ b/commit.h
@@ -296,7 +296,7 @@ void free_commit_extra_headers(struct commit_extra_header *extra);
  * Note that some headers (like mergetag) may be multi-line. It is the caller's
  * responsibility to parse further in this case!
  */
-const char *find_commit_header(const char *msg, const char *key,
+const char *find_header(const char *msg, const char *key,
 			       size_t *out_len);
 
 /* Find the end of the log message, the right place for a new trailer. */
diff --git a/gpg-interface.c b/gpg-interface.c
index b52eb0e2e04..c1bc9fa0459 100644
--- a/gpg-interface.c
+++ b/gpg-interface.c
@@ -597,7 +597,7 @@ static int parse_payload_metadata(struct signature_check *sigc)
 		BUG("invalid value for sigc->payload_type");
 	}
 
-	ident_line = find_commit_header(sigc->payload, signer_header, &ident_len);
+	ident_line = find_header(sigc->payload, signer_header, &ident_len);
 	if (!ident_line || !ident_len)
 		return 1;
 
diff --git a/pretty.c b/pretty.c
index ee6114e3f0a..306c647d2d7 100644
--- a/pretty.c
+++ b/pretty.c
@@ -633,7 +633,7 @@ static void add_merge_info(const struct pretty_print_context *pp,
 static char *get_header(const char *msg, const char *key)
 {
 	size_t len;
-	const char *v = find_commit_header(msg, key, &len);
+	const char *v = find_header(msg, key, &len);
 	return v ? xmemdupz(v, len) : NULL;
 }
 
diff --git a/sequencer.c b/sequencer.c
index e314af4d60a..ac535868121 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -912,7 +912,7 @@ static char *get_author(const char *message)
 	size_t len;
 	const char *a;
 
-	a = find_commit_header(message, "author", &len);
+	a = find_header(message, "author", &len);
 	if (a)
 		return xmemdupz(a, len);
 
diff --git a/t/helper/test-fast-rebase.c b/t/helper/test-fast-rebase.c
index fc2d4609043..576c2366258 100644
--- a/t/helper/test-fast-rebase.c
+++ b/t/helper/test-fast-rebase.c
@@ -44,7 +44,7 @@ static char *get_author(const char *message)
 	size_t len;
 	const char *a;
 
-	a = find_commit_header(message, "author", &len);
+	a = find_header(message, "author", &len);
 	if (a)
 		return xmemdupz(a, len);
 
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] receive-pack.c: consolidate find header logic
  2021-12-27 18:26 ` [PATCH 1/2] receive-pack.c: consolidate find header logic John Cai via GitGitGadget
@ 2021-12-27 22:33   ` Junio C Hamano
  0 siblings, 0 replies; 16+ messages in thread
From: Junio C Hamano @ 2021-12-27 22:33 UTC (permalink / raw)
  To: John Cai via GitGitGadget; +Cc: git, John Cai

"John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: John Cai <johncai86@gmail.com>
>
> There are two functions that have very similar logic of finding a header
> value. find_commit_header, and find_header. We can conslidate the logic

"consolidate".

> by using find_commit_header and replacing the logic in find_header.
> This helps clean up the code, as the logic for finding header values can
> stay in one place.

It does make sense to split the renaming and this change into two
separate steps like this series does.  The renaming done in 2/2
however makes readers wonder if our existing code paths that handle
tag objects becomes cleaner by using the function (and if not, if
the perceived benefit of making it into a more generic name is a
mere mirage), though.

> Signed-off-by: John Cai <johncai86@gmail.com>
> ---
>  builtin/receive-pack.c | 48 ++++++++++++++++++------------------------
>  commit.c               |  3 ++-
>  2 files changed, 23 insertions(+), 28 deletions(-)
>
> diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
> index 4f92e6f059d..939d4b28b7c 100644
> --- a/builtin/receive-pack.c
> +++ b/builtin/receive-pack.c
> @@ -581,32 +581,26 @@ static char *prepare_push_cert_nonce(const char *path, timestamp_t stamp)
>  	return strbuf_detach(&buf, NULL);
>  }
>  
> -/*
> - * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
> - * after dropping "_commit" from its name and possibly moving it out
> - * of commit.c
> - */
> -static char *find_header(const char *msg, size_t len, const char *key,
> -			 const char **next_line)
> +static char *find_header_value(const char *msg, const char *key, const char **next_line)

I do not think this change is quite right.  &msg[len] in the
original may not be the end of a NUL-terminated string, i.e. the
caller does not want this helper to scan through to the end of the
buffer but wants it to stop much earlier at the "len" the caller
specifies.

>  {
> -	int key_len = strlen(key);
> -	const char *line = msg;
> -
> -	while (line && line < msg + len) {
> -		const char *eol = strchrnul(line, '\n');
> -
> -		if ((msg + len <= eol) || line == eol)
> -			return NULL;
> -		if (line + key_len < eol &&
> -		    !memcmp(line, key, key_len) && line[key_len] == ' ') {
> -			int offset = key_len + 1;
> -			if (next_line)
> -				*next_line = *eol ? eol + 1 : eol;
> -			return xmemdupz(line + offset, (eol - line) - offset);
> -		}
> -		line = *eol ? eol + 1 : NULL;
> +	size_t out_len;
> +	const char *eol;
> +	char *ret;
> +
> +	const char *val = find_commit_header(msg, key, &out_len);
> +	if (val == NULL)
> +		return NULL;
> +
> +	eol = strchrnul(val, '\n');
> +	if (next_line) {
> +		*next_line = *eol ? eol + 1: eol;

Also, find_commit_header() has already figured out what the next
line should be.  If it is not just telling us, we are forced to
recompute it with an extra strchrnul(), but is that really the case?

HOWEVER.

Doesn't out_len have enough information to let us compute next_line
without scanning the line again?

> -	return NULL;
> +
> +	ret = xmalloc(out_len+1);
> +	memcpy(ret, val, out_len);
> +	ret[out_len] = '\0';

In any case, it is not necessary to open code xmemdupz() into these
three lines, no?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v2] receive-pack.c: consolidate find header logic
  2021-12-27 18:26 [PATCH 0/2] Consolidate find_header logic into one function John Cai via GitGitGadget
  2021-12-27 18:26 ` [PATCH 1/2] receive-pack.c: consolidate find header logic John Cai via GitGitGadget
  2021-12-27 18:26 ` [PATCH 2/2] commit.c: rename find_commit_header to find_header John Cai via GitGitGadget
@ 2021-12-29  6:19 ` John Cai via GitGitGadget
  2021-12-30 23:01   ` Junio C Hamano
  2021-12-31  6:17   ` [PATCH v3] " John Cai via GitGitGadget
  2 siblings, 2 replies; 16+ messages in thread
From: John Cai via GitGitGadget @ 2021-12-29  6:19 UTC (permalink / raw)
  To: git; +Cc: John Cai, John Cai

From: John Cai <johncai86@gmail.com>

There are two functions that have very similar logic of finding a header
value. find_commit_header, and find_header. We can conslidate the logic
by using find_commit_header and replacing the logic in find_header.

Introduce a new function find_header_max, which is equivalent to
find_commit_header except it takes a len parameter that determines how
many bytes to read. find_commit_header can then call find_header_max
with 0 as the len.

This cleans up duplicate logic, as the logic for finding header values
is now all in one place.

Signed-off-by: John Cai <johncai86@gmail.com>
---
    Consolidate find_header logic into one function
    
    This addresses the NEEDSWORK comment in builtin/receive-pack.c:
    
     /**
       * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
       * after dropping "_commit" from its name and possibly moving it out
       * of commit.c
       **/
    
    
    There is some duplicated logic between find_header and
    find_commit_header that can be consolidated instead of having two places
    in the code that do essentially the same thing. For the sake of simpler
    and more DRY code, use find_commit_header and rename it to find_header
    since it is not limited to finding headers for only commits.
    
    Changes since v1:
    
     * got rid of renaming from find_commit_header -> find_header since the
       renaming did not provide much value
     * simplified logic in find_header
     * introduce find_header_max function, which is what find_commit_header
       was before except it adds a "len" parameter to determine how much of
       the buffer to read.

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1125%2Fjohn-cai%2Fjc%2Freplace-find-header-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1125/john-cai/jc/replace-find-header-v2
Pull-Request: https://github.com/git/git/pull/1125

Range-diff vs v1:

 1:  9465c20d4bd ! 1:  5e0d90d642b receive-pack.c: consolidate find header logic
     @@ Commit message
          There are two functions that have very similar logic of finding a header
          value. find_commit_header, and find_header. We can conslidate the logic
          by using find_commit_header and replacing the logic in find_header.
     -    This helps clean up the code, as the logic for finding header values can
     -    stay in one place.
     +
     +    Introduce a new function find_header_max, which is equivalent to
     +    find_commit_header except it takes a len parameter that determines how
     +    many bytes to read. find_commit_header can then call find_header_max
     +    with 0 as the len.
     +
     +    This cleans up duplicate logic, as the logic for finding header values
     +    is now all in one place.
      
          Signed-off-by: John Cai <johncai86@gmail.com>
      
     @@ builtin/receive-pack.c: static char *prepare_push_cert_nonce(const char *path, t
      - * after dropping "_commit" from its name and possibly moving it out
      - * of commit.c
      - */
     --static char *find_header(const char *msg, size_t len, const char *key,
     --			 const char **next_line)
     -+static char *find_header_value(const char *msg, const char *key, const char **next_line)
     + static char *find_header(const char *msg, size_t len, const char *key,
     + 			 const char **next_line)
       {
      -	int key_len = strlen(key);
      -	const char *line = msg;
     @@ builtin/receive-pack.c: static char *prepare_push_cert_nonce(const char *path, t
      -			return xmemdupz(line + offset, (eol - line) - offset);
      -		}
      -		line = *eol ? eol + 1 : NULL;
     +-	}
     +-	return NULL;
      +	size_t out_len;
     -+	const char *eol;
     -+	char *ret;
     ++	const char *val = find_header_max(msg, key, len, &out_len);
      +
     -+	const char *val = find_commit_header(msg, key, &out_len);
      +	if (val == NULL)
      +		return NULL;
      +
     -+	eol = strchrnul(val, '\n');
     -+	if (next_line) {
     -+		*next_line = *eol ? eol + 1: eol;
     - 	}
     --	return NULL;
     -+
     -+	ret = xmalloc(out_len+1);
     -+	memcpy(ret, val, out_len);
     -+	ret[out_len] = '\0';
     ++	if (next_line)
     ++		*next_line = val + out_len + 1;
      +
     -+	return ret;
     ++	return xmemdupz(val, out_len);
       }
       
       /*
     -@@ builtin/receive-pack.c: static int constant_memequal(const char *a, const char *b, size_t n)
     +
     + ## commit.c ##
     +@@ commit.c: struct commit_list **commit_list_append(struct commit *commit,
     + 	return &new_commit->next;
     + }
       
     - static const char *check_nonce(const char *buf, size_t len)
     +-const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
     ++const char *find_header_max(const char *msg, const char *key,
     ++			size_t len,
     ++			size_t *out_len)
       {
     --	char *nonce = find_header(buf, len, "nonce", NULL);
     -+	char *nonce = find_header_value(buf, "nonce", NULL);
     -+
     - 	timestamp_t stamp, ostamp;
     - 	char *bohmac, *expect = NULL;
     - 	const char *retval = NONCE_BAD;
     -@@ builtin/receive-pack.c: static int check_cert_push_options(const struct string_list *push_options)
     - 	if (!len)
     - 		return 1;
     + 	int key_len = strlen(key);
     + 	const char *line = msg;
       
     --	while ((option = find_header(buf, len, "push-option", &next_line))) {
     --		len -= (next_line - buf);
     -+	while ((option = find_header_value(buf, "push-option", &next_line))) {
     - 		buf = next_line;
     - 		options_seen++;
     - 		if (options_seen > push_options->nr
     -
     - ## commit.c ##
     +-	while (line) {
     ++	while (line && (len == 0 || line < msg + len)) {
     + 		const char *eol = strchrnul(line, '\n');
     + 
     + 		if (line == eol)
      @@ commit.c: const char *find_commit_header(const char *msg, const char *key, size_t *out_len
     - 		if (eol - line > key_len &&
     - 		    !strncmp(line, key, key_len) &&
     - 		    line[key_len] == ' ') {
     --			*out_len = eol - line - key_len - 1;
     -+			if (out_len != NULL)
     -+				*out_len = eol - line - key_len - 1;
     - 			return line + key_len + 1;
     - 		}
     - 		line = *eol ? eol + 1 : NULL;
     + 	return NULL;
     + }
     + 
     ++const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
     ++{
     ++	return find_header_max(msg, key, 0, out_len);
     ++}
     + /*
     +  * Inspect the given string and determine the true "end" of the log message, in
     +  * order to find where to put a new Signed-off-by trailer.  Ignored are
     +
     + ## commit.h ##
     +@@ commit.h: void free_commit_extra_headers(struct commit_extra_header *extra);
     + 
     + /*
     +  * Search the commit object contents given by "msg" for the header "key".
     ++ * Reads up to "len" bytes of "msg".
     +  * Returns a pointer to the start of the header contents, or NULL. The length
     +  * of the header, up to the first newline, is returned via out_len.
     +  *
     +  * Note that some headers (like mergetag) may be multi-line. It is the caller's
     +  * responsibility to parse further in this case!
     +  */
     ++const char *find_header_max(const char *msg, const char *key,
     ++			size_t len,
     ++			size_t *out_len);
     ++
     + const char *find_commit_header(const char *msg, const char *key,
     + 			       size_t *out_len);
     + 
 2:  384a635daa2 < -:  ----------- commit.c: rename find_commit_header to find_header


 builtin/receive-pack.c | 33 ++++++++++-----------------------
 commit.c               | 10 ++++++++--
 commit.h               |  5 +++++
 3 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 9f4a0b816cf..b69ead8dcda 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -581,32 +581,19 @@ static char *prepare_push_cert_nonce(const char *path, timestamp_t stamp)
 	return strbuf_detach(&buf, NULL);
 }
 
-/*
- * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
- * after dropping "_commit" from its name and possibly moving it out
- * of commit.c
- */
 static char *find_header(const char *msg, size_t len, const char *key,
 			 const char **next_line)
 {
-	int key_len = strlen(key);
-	const char *line = msg;
-
-	while (line && line < msg + len) {
-		const char *eol = strchrnul(line, '\n');
-
-		if ((msg + len <= eol) || line == eol)
-			return NULL;
-		if (line + key_len < eol &&
-		    !memcmp(line, key, key_len) && line[key_len] == ' ') {
-			int offset = key_len + 1;
-			if (next_line)
-				*next_line = *eol ? eol + 1 : eol;
-			return xmemdupz(line + offset, (eol - line) - offset);
-		}
-		line = *eol ? eol + 1 : NULL;
-	}
-	return NULL;
+	size_t out_len;
+	const char *val = find_header_max(msg, key, len, &out_len);
+
+	if (val == NULL)
+		return NULL;
+
+	if (next_line)
+		*next_line = val + out_len + 1;
+
+	return xmemdupz(val, out_len);
 }
 
 /*
diff --git a/commit.c b/commit.c
index a348f085b2b..2ed115e04a0 100644
--- a/commit.c
+++ b/commit.c
@@ -1631,12 +1631,14 @@ struct commit_list **commit_list_append(struct commit *commit,
 	return &new_commit->next;
 }
 
-const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
+const char *find_header_max(const char *msg, const char *key,
+			size_t len,
+			size_t *out_len)
 {
 	int key_len = strlen(key);
 	const char *line = msg;
 
-	while (line) {
+	while (line && (len == 0 || line < msg + len)) {
 		const char *eol = strchrnul(line, '\n');
 
 		if (line == eol)
@@ -1653,6 +1655,10 @@ const char *find_commit_header(const char *msg, const char *key, size_t *out_len
 	return NULL;
 }
 
+const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
+{
+	return find_header_max(msg, key, 0, out_len);
+}
 /*
  * Inspect the given string and determine the true "end" of the log message, in
  * order to find where to put a new Signed-off-by trailer.  Ignored are
diff --git a/commit.h b/commit.h
index 3ea32766bcb..41ec89af5b5 100644
--- a/commit.h
+++ b/commit.h
@@ -290,12 +290,17 @@ void free_commit_extra_headers(struct commit_extra_header *extra);
 
 /*
  * Search the commit object contents given by "msg" for the header "key".
+ * Reads up to "len" bytes of "msg".
  * Returns a pointer to the start of the header contents, or NULL. The length
  * of the header, up to the first newline, is returned via out_len.
  *
  * Note that some headers (like mergetag) may be multi-line. It is the caller's
  * responsibility to parse further in this case!
  */
+const char *find_header_max(const char *msg, const char *key,
+			size_t len,
+			size_t *out_len);
+
 const char *find_commit_header(const char *msg, const char *key,
 			       size_t *out_len);
 

base-commit: 55b058a8bbcc54bd93c733035c995abc7967e539
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] receive-pack.c: consolidate find header logic
  2021-12-29  6:19 ` [PATCH v2] receive-pack.c: consolidate find header logic John Cai via GitGitGadget
@ 2021-12-30 23:01   ` Junio C Hamano
  2021-12-31  6:17   ` [PATCH v3] " John Cai via GitGitGadget
  1 sibling, 0 replies; 16+ messages in thread
From: Junio C Hamano @ 2021-12-30 23:01 UTC (permalink / raw)
  To: John Cai via GitGitGadget; +Cc: git, John Cai

"John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: John Cai <johncai86@gmail.com>
>
> There are two functions that have very similar logic of finding a header
> value. find_commit_header, and find_header. We can conslidate the logic
> by using find_commit_header and replacing the logic in find_header.
>
> Introduce a new function find_header_max, which is equivalent to
> find_commit_header except it takes a len parameter that determines how
> many bytes to read. find_commit_header can then call find_header_max
> with 0 as the len.

find_header_max() is not the name of the function that finds the
largest header?  That is misleading.

<git-compat-util.h> defines a few helper functions that take a
counted string, and they are named with _mem() suffix after the name
of their NUL-terminated counterparts (if exists).  skip_prefix() has
skip_prefix_mem(), strip_suffix() has strip_suffix_mem().

find_header_mem() or something along that line, perhaps?

> diff --git a/commit.c b/commit.c
> index a348f085b2b..2ed115e04a0 100644
> --- a/commit.c
> +++ b/commit.c
> @@ -1631,12 +1631,14 @@ struct commit_list **commit_list_append(struct commit *commit,
>  	return &new_commit->next;
>  }
>  
> -const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
> +const char *find_header_max(const char *msg, const char *key,
> +			size_t len,
> +			size_t *out_len)

If <len> is meant to be the length part of <ptr,len> pair, we should
have it immediately after the <ptr> parameter.

	find_header_mem(const char *msg, size_t len,
        		const char *key, size_t *out_len)

That makes it clear to the readers that <msg, len> are close friends.

Also, I have a feeling that ...

>  {
>  	int key_len = strlen(key);
>  	const char *line = msg;
>  
> -	while (line) {
> +	while (line && (len == 0 || line < msg + len)) {

... we do not want this special casing of "if !len".  By making the
caller responsible to _always_ supply the length of msg, we can lose
the conditional.

>  		const char *eol = strchrnul(line, '\n');
>  
>  		if (line == eol)
> @@ -1653,6 +1655,10 @@ const char *find_commit_header(const char *msg, const char *key, size_t *out_len
>  	return NULL;
>  }
>  
> +const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
> +{
> +	return find_header_max(msg, key, 0, out_len);

I.e. find_header_mem(msg, strlen(msg), key, out_len);

> diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
> index 9f4a0b816cf..b69ead8dcda 100644
> --- a/builtin/receive-pack.c
> +++ b/builtin/receive-pack.c
> @@ -581,32 +581,19 @@ static char *prepare_push_cert_nonce(const char *path, timestamp_t stamp)
>  	return strbuf_detach(&buf, NULL);
>  }
>  
> -/*
> - * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
> - * after dropping "_commit" from its name and possibly moving it out
> - * of commit.c
> - */
>  static char *find_header(const char *msg, size_t len, const char *key,
>  			 const char **next_line)
>  {
> +	size_t out_len;
> +	const char *val = find_header_max(msg, key, len, &out_len);
> +
> +	if (val == NULL)
> +		return NULL;
> +
> +	if (next_line)
> +		*next_line = val + out_len + 1;
> +
> +	return xmemdupz(val, out_len);
>  }

Yup, something along that line.  Note that find_header() does make
it clear that the <msg, len> parameters form a pair.  We want to do
the same for the new helper.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v3] receive-pack.c: consolidate find header logic
  2021-12-29  6:19 ` [PATCH v2] receive-pack.c: consolidate find header logic John Cai via GitGitGadget
  2021-12-30 23:01   ` Junio C Hamano
@ 2021-12-31  6:17   ` John Cai via GitGitGadget
  2022-01-04  1:56     ` Junio C Hamano
  2022-01-05 15:21     ` [PATCH v4] " John Cai via GitGitGadget
  1 sibling, 2 replies; 16+ messages in thread
From: John Cai via GitGitGadget @ 2021-12-31  6:17 UTC (permalink / raw)
  To: git; +Cc: John Cai, John Cai

From: John Cai <johncai86@gmail.com>

There are two functions that have very similar logic of finding a header
value. find_commit_header, and find_header. We can conslidate the logic
by using find_commit_header and replacing the logic in find_header.

Introduce a new function find_header_max, which is equivalent to
find_commit_header except it takes a len parameter that determines how
many bytes to read. find_commit_header can then call find_header_max
with 0 as the len.

This cleans up duplicate logic, as the logic for finding header values
is now all in one place.

Signed-off-by: John Cai <johncai86@gmail.com>
---
    Consolidate find_header logic into one function
    
    This addresses the NEEDSWORK comment in builtin/receive-pack.c:
    
     /**
       * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
       * after dropping "_commit" from its name and possibly moving it out
       * of commit.c
       **/
    
    
    There is some duplicated logic between find_header and
    find_commit_header that can be consolidated instead of having two places
    in the code that do essentially the same thing. For the sake of simpler
    and more DRY code, use find_commit_header and rename it to find_header
    since it is not limited to finding headers for only commits.
    
    Changes since v2:
    
     * s/find_header_max/find_header_mem
     * moved "len" argument right next to "msg" agument in find_header_mem
     * removed special condition in find_header_mem to check for line == 0

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1125%2Fjohn-cai%2Fjc%2Freplace-find-header-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1125/john-cai/jc/replace-find-header-v3
Pull-Request: https://github.com/git/git/pull/1125

Range-diff vs v2:

 1:  5e0d90d642b ! 1:  463df42e7e0 receive-pack.c: consolidate find header logic
     @@ builtin/receive-pack.c: static char *prepare_push_cert_nonce(const char *path, t
      -	}
      -	return NULL;
      +	size_t out_len;
     -+	const char *val = find_header_max(msg, key, len, &out_len);
     ++	const char *val = find_header_mem(msg, len, key, &out_len);
      +
      +	if (val == NULL)
      +		return NULL;
     @@ commit.c: struct commit_list **commit_list_append(struct commit *commit,
       }
       
      -const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
     -+const char *find_header_max(const char *msg, const char *key,
     -+			size_t len,
     -+			size_t *out_len)
     ++const char *find_header_mem(const char *msg, size_t len,
     ++			const char *key, size_t *out_len)
       {
       	int key_len = strlen(key);
       	const char *line = msg;
       
      -	while (line) {
     -+	while (line && (len == 0 || line < msg + len)) {
     ++	while (line && line < msg + len) {
       		const char *eol = strchrnul(line, '\n');
       
       		if (line == eol)
     @@ commit.c: const char *find_commit_header(const char *msg, const char *key, size_
       
      +const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
      +{
     -+	return find_header_max(msg, key, 0, out_len);
     ++	return find_header_mem(msg, strlen(msg), key, out_len);
      +}
       /*
        * Inspect the given string and determine the true "end" of the log message, in
     @@ commit.h: void free_commit_extra_headers(struct commit_extra_header *extra);
        * Note that some headers (like mergetag) may be multi-line. It is the caller's
        * responsibility to parse further in this case!
        */
     -+const char *find_header_max(const char *msg, const char *key,
     -+			size_t len,
     ++const char *find_header_mem(const char *msg, size_t len,
     ++			const char *key,
      +			size_t *out_len);
      +
       const char *find_commit_header(const char *msg, const char *key,


 builtin/receive-pack.c | 33 ++++++++++-----------------------
 commit.c               |  9 +++++++--
 commit.h               |  5 +++++
 3 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 9f4a0b816cf..b661c51a538 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -581,32 +581,19 @@ static char *prepare_push_cert_nonce(const char *path, timestamp_t stamp)
 	return strbuf_detach(&buf, NULL);
 }
 
-/*
- * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
- * after dropping "_commit" from its name and possibly moving it out
- * of commit.c
- */
 static char *find_header(const char *msg, size_t len, const char *key,
 			 const char **next_line)
 {
-	int key_len = strlen(key);
-	const char *line = msg;
-
-	while (line && line < msg + len) {
-		const char *eol = strchrnul(line, '\n');
-
-		if ((msg + len <= eol) || line == eol)
-			return NULL;
-		if (line + key_len < eol &&
-		    !memcmp(line, key, key_len) && line[key_len] == ' ') {
-			int offset = key_len + 1;
-			if (next_line)
-				*next_line = *eol ? eol + 1 : eol;
-			return xmemdupz(line + offset, (eol - line) - offset);
-		}
-		line = *eol ? eol + 1 : NULL;
-	}
-	return NULL;
+	size_t out_len;
+	const char *val = find_header_mem(msg, len, key, &out_len);
+
+	if (val == NULL)
+		return NULL;
+
+	if (next_line)
+		*next_line = val + out_len + 1;
+
+	return xmemdupz(val, out_len);
 }
 
 /*
diff --git a/commit.c b/commit.c
index a348f085b2b..8ac32a4d7b5 100644
--- a/commit.c
+++ b/commit.c
@@ -1631,12 +1631,13 @@ struct commit_list **commit_list_append(struct commit *commit,
 	return &new_commit->next;
 }
 
-const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
+const char *find_header_mem(const char *msg, size_t len,
+			const char *key, size_t *out_len)
 {
 	int key_len = strlen(key);
 	const char *line = msg;
 
-	while (line) {
+	while (line && line < msg + len) {
 		const char *eol = strchrnul(line, '\n');
 
 		if (line == eol)
@@ -1653,6 +1654,10 @@ const char *find_commit_header(const char *msg, const char *key, size_t *out_len
 	return NULL;
 }
 
+const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
+{
+	return find_header_mem(msg, strlen(msg), key, out_len);
+}
 /*
  * Inspect the given string and determine the true "end" of the log message, in
  * order to find where to put a new Signed-off-by trailer.  Ignored are
diff --git a/commit.h b/commit.h
index 3ea32766bcb..38cc5426615 100644
--- a/commit.h
+++ b/commit.h
@@ -290,12 +290,17 @@ void free_commit_extra_headers(struct commit_extra_header *extra);
 
 /*
  * Search the commit object contents given by "msg" for the header "key".
+ * Reads up to "len" bytes of "msg".
  * Returns a pointer to the start of the header contents, or NULL. The length
  * of the header, up to the first newline, is returned via out_len.
  *
  * Note that some headers (like mergetag) may be multi-line. It is the caller's
  * responsibility to parse further in this case!
  */
+const char *find_header_mem(const char *msg, size_t len,
+			const char *key,
+			size_t *out_len);
+
 const char *find_commit_header(const char *msg, const char *key,
 			       size_t *out_len);
 

base-commit: 55b058a8bbcc54bd93c733035c995abc7967e539
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] receive-pack.c: consolidate find header logic
  2021-12-31  6:17   ` [PATCH v3] " John Cai via GitGitGadget
@ 2022-01-04  1:56     ` Junio C Hamano
  2022-01-04 15:12       ` John Cai
  2022-01-05 15:21     ` [PATCH v4] " John Cai via GitGitGadget
  1 sibling, 1 reply; 16+ messages in thread
From: Junio C Hamano @ 2022-01-04  1:56 UTC (permalink / raw)
  To: John Cai via GitGitGadget; +Cc: git, John Cai

"John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:

> +	size_t out_len;
> +	const char *val = find_header_mem(msg, len, key, &out_len);
> +
> +	if (val == NULL)

Style:

	if (!val)

> +		return NULL;
> +
> +	if (next_line)
> +		*next_line = val + out_len + 1;
> +
> +	return xmemdupz(val, out_len);
>  }
>  
>  /*
> diff --git a/commit.c b/commit.c
> index a348f085b2b..8ac32a4d7b5 100644
> --- a/commit.c
> +++ b/commit.c
> @@ -1631,12 +1631,13 @@ struct commit_list **commit_list_append(struct commit *commit,
>  	return &new_commit->next;
>  }
>  
> -const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
> +const char *find_header_mem(const char *msg, size_t len,
> +			const char *key, size_t *out_len)
>  {
>  	int key_len = strlen(key);
>  	const char *line = msg;
>  
> +	while (line && line < msg + len) {
>  		const char *eol = strchrnul(line, '\n');

Between line[0] and msg[len], there may not be a LF nor NUL at all,
and strchrnul() will scan beyond the range we were given (which is
msg[0]..msg[len]).

But that is something we share with the find_header() if I am not
mistaken, so I am OK to leave the code as posted and leave it
outside the scope of this series to clean it up to make it safer.

The reason why it is probably safe for the current set of callers
(and presumably any reasonable new callers we may add later) is that
they computed "len" by scanning the block of memory starting at (or
before) "msg" before calling us, and we know that the block of
memory is properly NUL-terminated.  find_header() is called by
check_nonce() and check_cert_push_options(), both of which tell
us to scan in a strbuf, which is designed to be scannable for NUL
safely by having an extra NUL at the end beyond the end.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] receive-pack.c: consolidate find header logic
  2022-01-04  1:56     ` Junio C Hamano
@ 2022-01-04 15:12       ` John Cai
  0 siblings, 0 replies; 16+ messages in thread
From: John Cai @ 2022-01-04 15:12 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: John Cai via GitGitGadget, git



> On Jan 3, 2022, at 8:56 PM, Junio C Hamano <gitster@pobox.com> wrote:
> 
> "John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:
> 
>> +	size_t out_len;
>> +	const char *val = find_header_mem(msg, len, key, &out_len);
>> +
>> +	if (val == NULL)
> 
> Style:
> 
> 	if (!val)
> 
>> +		return NULL;
>> +
>> +	if (next_line)
>> +		*next_line = val + out_len + 1;
>> +
>> +	return xmemdupz(val, out_len);
>> }
>> 
>> /*
>> diff --git a/commit.c b/commit.c
>> index a348f085b2b..8ac32a4d7b5 100644
>> --- a/commit.c
>> +++ b/commit.c
>> @@ -1631,12 +1631,13 @@ struct commit_list **commit_list_append(struct commit *commit,
>> 	return &new_commit->next;
>> }
>> 
>> -const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
>> +const char *find_header_mem(const char *msg, size_t len,
>> +			const char *key, size_t *out_len)
>> {
>> 	int key_len = strlen(key);
>> 	const char *line = msg;
>> 
>> +	while (line && line < msg + len) {
>> 		const char *eol = strchrnul(line, '\n');
> 
> Between line[0] and msg[len], there may not be a LF nor NUL at all,
> and strchrnul() will scan beyond the range we were given (which is
> msg[0]..msg[len]).
> 
> But that is something we share with the find_header() if I am not
> mistaken, so I am OK to leave the code as posted and leave it
> outside the scope of this series to clean it up to make it safer.

Good catch. Thanks for pointing that out-I didn’t notice it. I’ve added a NEEDSWORK
Block to this so we can address it in a later patch series.

> 
> The reason why it is probably safe for the current set of callers
> (and presumably any reasonable new callers we may add later) is that
> they computed "len" by scanning the block of memory starting at (or
> before) "msg" before calling us, and we know that the block of
> memory is properly NUL-terminated.  find_header() is called by
> check_nonce() and check_cert_push_options(), both of which tell
> us to scan in a strbuf, which is designed to be scannable for NUL
> safely by having an extra NUL at the end beyond the end.
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v4] receive-pack.c: consolidate find header logic
  2021-12-31  6:17   ` [PATCH v3] " John Cai via GitGitGadget
  2022-01-04  1:56     ` Junio C Hamano
@ 2022-01-05 15:21     ` John Cai via GitGitGadget
  2022-01-05 20:10       ` Junio C Hamano
  2022-01-06  0:51       ` [PATCH v5] " John Cai via GitGitGadget
  1 sibling, 2 replies; 16+ messages in thread
From: John Cai via GitGitGadget @ 2022-01-05 15:21 UTC (permalink / raw)
  To: git; +Cc: John Cai, John Cai

From: John Cai <johncai86@gmail.com>

There are two functions that have very similar logic of finding a header
value. find_commit_header, and find_header. We can conslidate the logic
by introducing a new function find_header_mem, which is equivalent to
find_commit_header except it takes a len parameter that determines how
many bytes will be read. find_commit_header and find_header can then both
call find_header_mem.

This reduces duplicate logic, as the logic for finding header values
can now all live in one place.

Signed-off-by: John Cai <johncai86@gmail.com>
---
    Consolidate find_header logic into one function
    
    This addresses the NEEDSWORK comment in builtin/receive-pack.c:
    
     /**
       * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
       * after dropping "_commit" from its name and possibly moving it out
       * of commit.c
       **/
    
    
    There are two functions that have very similar logic of finding a header
    value. find_commit_header, and find_header. We can conslidate the logic
    by introducing a new function find_header_mem, which is equivalent to
    find_commit_header except it takes a len parameter that determines how
    many bytes will be read. find_commit_header and find_header can then
    both call find_header_mem.
    
    This reduces duplicate logic, as the logic for finding header values can
    now all live in one place.
    
    Changes since v4:
    
     * added NEEDSWORK block detailing what needs to be done to clean up
       find_header_mem
    
    Changes since v3:
    
     * fixed verbiage in commit message
     * adjusted style of an if block (based on Junio's feedback)

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1125%2Fjohn-cai%2Fjc%2Freplace-find-header-v4
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1125/john-cai/jc/replace-find-header-v4
Pull-Request: https://github.com/git/git/pull/1125

Range-diff vs v3:

 1:  463df42e7e0 ! 1:  a7b00022b00 receive-pack.c: consolidate find header logic
     @@ Commit message
      
          There are two functions that have very similar logic of finding a header
          value. find_commit_header, and find_header. We can conslidate the logic
     -    by using find_commit_header and replacing the logic in find_header.
     -
     -    Introduce a new function find_header_max, which is equivalent to
     +    by introducing a new function find_header_mem, which is equivalent to
          find_commit_header except it takes a len parameter that determines how
     -    many bytes to read. find_commit_header can then call find_header_max
     -    with 0 as the len.
     +    many bytes will be read. find_commit_header and find_header can then both
     +    call find_header_mem.
      
     -    This cleans up duplicate logic, as the logic for finding header values
     -    is now all in one place.
     +    This reduces duplicate logic, as the logic for finding header values
     +    can now all live in one place.
      
          Signed-off-by: John Cai <johncai86@gmail.com>
      
     @@ builtin/receive-pack.c: static char *prepare_push_cert_nonce(const char *path, t
      +	size_t out_len;
      +	const char *val = find_header_mem(msg, len, key, &out_len);
      +
     -+	if (val == NULL)
     ++	if (!val)
      +		return NULL;
      +
      +	if (next_line)
     @@ commit.c: struct commit_list **commit_list_append(struct commit *commit,
       	const char *line = msg;
       
      -	while (line) {
     ++	/*
     ++	 * NEEDSWORK: Between line[0] and msg[len], there may not be a LF nor NUL
     ++	 * at all, and strchrnul() will scan beyond the range we were given
     ++	 * Make this operation safer and abide by the contract to only read up to len.
     ++	 */
      +	while (line && line < msg + len) {
       		const char *eol = strchrnul(line, '\n');
       


 builtin/receive-pack.c | 33 ++++++++++-----------------------
 commit.c               | 14 ++++++++++++--
 commit.h               |  5 +++++
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 9f4a0b816cf..5c2732a0d07 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -581,32 +581,19 @@ static char *prepare_push_cert_nonce(const char *path, timestamp_t stamp)
 	return strbuf_detach(&buf, NULL);
 }
 
-/*
- * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
- * after dropping "_commit" from its name and possibly moving it out
- * of commit.c
- */
 static char *find_header(const char *msg, size_t len, const char *key,
 			 const char **next_line)
 {
-	int key_len = strlen(key);
-	const char *line = msg;
-
-	while (line && line < msg + len) {
-		const char *eol = strchrnul(line, '\n');
-
-		if ((msg + len <= eol) || line == eol)
-			return NULL;
-		if (line + key_len < eol &&
-		    !memcmp(line, key, key_len) && line[key_len] == ' ') {
-			int offset = key_len + 1;
-			if (next_line)
-				*next_line = *eol ? eol + 1 : eol;
-			return xmemdupz(line + offset, (eol - line) - offset);
-		}
-		line = *eol ? eol + 1 : NULL;
-	}
-	return NULL;
+	size_t out_len;
+	const char *val = find_header_mem(msg, len, key, &out_len);
+
+	if (!val)
+		return NULL;
+
+	if (next_line)
+		*next_line = val + out_len + 1;
+
+	return xmemdupz(val, out_len);
 }
 
 /*
diff --git a/commit.c b/commit.c
index a348f085b2b..5ece03e6373 100644
--- a/commit.c
+++ b/commit.c
@@ -1631,12 +1631,18 @@ struct commit_list **commit_list_append(struct commit *commit,
 	return &new_commit->next;
 }
 
-const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
+const char *find_header_mem(const char *msg, size_t len,
+			const char *key, size_t *out_len)
 {
 	int key_len = strlen(key);
 	const char *line = msg;
 
-	while (line) {
+	/*
+	 * NEEDSWORK: Between line[0] and msg[len], there may not be a LF nor NUL
+	 * at all, and strchrnul() will scan beyond the range we were given
+	 * Make this operation safer and abide by the contract to only read up to len.
+	 */
+	while (line && line < msg + len) {
 		const char *eol = strchrnul(line, '\n');
 
 		if (line == eol)
@@ -1653,6 +1659,10 @@ const char *find_commit_header(const char *msg, const char *key, size_t *out_len
 	return NULL;
 }
 
+const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
+{
+	return find_header_mem(msg, strlen(msg), key, out_len);
+}
 /*
  * Inspect the given string and determine the true "end" of the log message, in
  * order to find where to put a new Signed-off-by trailer.  Ignored are
diff --git a/commit.h b/commit.h
index 3ea32766bcb..38cc5426615 100644
--- a/commit.h
+++ b/commit.h
@@ -290,12 +290,17 @@ void free_commit_extra_headers(struct commit_extra_header *extra);
 
 /*
  * Search the commit object contents given by "msg" for the header "key".
+ * Reads up to "len" bytes of "msg".
  * Returns a pointer to the start of the header contents, or NULL. The length
  * of the header, up to the first newline, is returned via out_len.
  *
  * Note that some headers (like mergetag) may be multi-line. It is the caller's
  * responsibility to parse further in this case!
  */
+const char *find_header_mem(const char *msg, size_t len,
+			const char *key,
+			size_t *out_len);
+
 const char *find_commit_header(const char *msg, const char *key,
 			       size_t *out_len);
 

base-commit: c8b2ade48c204690119936ada89cd938c476c5c2
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v4] receive-pack.c: consolidate find header logic
  2022-01-05 15:21     ` [PATCH v4] " John Cai via GitGitGadget
@ 2022-01-05 20:10       ` Junio C Hamano
  2022-01-06  0:51       ` [PATCH v5] " John Cai via GitGitGadget
  1 sibling, 0 replies; 16+ messages in thread
From: Junio C Hamano @ 2022-01-05 20:10 UTC (permalink / raw)
  To: John Cai via GitGitGadget; +Cc: git, John Cai

"John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:

>     Changes since v4:
>     
>      * added NEEDSWORK block detailing what needs to be done to clean up
>        find_header_mem
> ...
>       -	while (line) {
>      ++	/*
>      ++	 * NEEDSWORK: Between line[0] and msg[len], there may not be a LF nor NUL
>      ++	 * at all, and strchrnul() will scan beyond the range we were given
>      ++	 * Make this operation safer and abide by the contract to only read up to len.
>      ++	 */

This sounds unnecessarily alarming.  Can't we also explain that the
current callers are safe?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v5] receive-pack.c: consolidate find header logic
  2022-01-05 15:21     ` [PATCH v4] " John Cai via GitGitGadget
  2022-01-05 20:10       ` Junio C Hamano
@ 2022-01-06  0:51       ` John Cai via GitGitGadget
  2022-01-06 19:40         ` Junio C Hamano
  2022-01-06 20:07         ` [PATCH v6] " John Cai via GitGitGadget
  1 sibling, 2 replies; 16+ messages in thread
From: John Cai via GitGitGadget @ 2022-01-06  0:51 UTC (permalink / raw)
  To: git; +Cc: John Cai, John Cai

From: John Cai <johncai86@gmail.com>

There are two functions that have very similar logic of finding a header
value. find_commit_header, and find_header. We can conslidate the logic
by introducing a new function find_header_mem, which is equivalent to
find_commit_header except it takes a len parameter that determines how
many bytes will be read. find_commit_header and find_header can then both
call find_header_mem.

This reduces duplicate logic, as the logic for finding header values
can now all live in one place.

Signed-off-by: John Cai <johncai86@gmail.com>
---
    Consolidate find_header logic into one function
    
    This addresses the NEEDSWORK comment in builtin/receive-pack.c:
    
     /**
       * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
       * after dropping "_commit" from its name and possibly moving it out
       * of commit.c
       **/
    
    
    There are two functions that have very similar logic of finding a header
    value. find_commit_header, and find_header. We can conslidate the logic
    by introducing a new function find_header_mem, which is equivalent to
    find_commit_header except it takes a len parameter that determines how
    many bytes will be read. find_commit_header and find_header can then
    both call find_header_mem.
    
    This reduces duplicate logic, as the logic for finding header values can
    now all live in one place.
    
    Changes since v4:
    
     * adjust verbiage of NEEDSWORK comment block
    
    Changes since v3:
    
     * added NEEDSWORK block detailing what needs to be done to clean up
       find_header_mem
     * fixed verbiage in commit message
     * adjusted style of an if block (based on Junio's feedback)

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1125%2Fjohn-cai%2Fjc%2Freplace-find-header-v5
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1125/john-cai/jc/replace-find-header-v5
Pull-Request: https://github.com/git/git/pull/1125

Range-diff vs v4:

 1:  a7b00022b00 ! 1:  02da3136c43 receive-pack.c: consolidate find header logic
     @@ commit.c: struct commit_list **commit_list_append(struct commit *commit,
       
      -	while (line) {
      +	/*
     -+	 * NEEDSWORK: Between line[0] and msg[len], there may not be a LF nor NUL
     -+	 * at all, and strchrnul() will scan beyond the range we were given
     -+	 * Make this operation safer and abide by the contract to only read up to len.
     ++	 * NEEDSWORK: It's possible for strchrnul() to scan beyond the range
     ++	 * given by len. However, current callers are safe because they compute
     ++	 * len by scanning a NUL-terminated block of memory starting at msg.
     ++	 * Make this function safer by checking if the input is NUL-terminated
     ++	 * or has a NL between line[0] and msg[len].
      +	 */
      +	while (line && line < msg + len) {
       		const char *eol = strchrnul(line, '\n');


 builtin/receive-pack.c | 33 ++++++++++-----------------------
 commit.c               | 16 ++++++++++++++--
 commit.h               |  5 +++++
 3 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 9f4a0b816cf..5c2732a0d07 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -581,32 +581,19 @@ static char *prepare_push_cert_nonce(const char *path, timestamp_t stamp)
 	return strbuf_detach(&buf, NULL);
 }
 
-/*
- * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
- * after dropping "_commit" from its name and possibly moving it out
- * of commit.c
- */
 static char *find_header(const char *msg, size_t len, const char *key,
 			 const char **next_line)
 {
-	int key_len = strlen(key);
-	const char *line = msg;
-
-	while (line && line < msg + len) {
-		const char *eol = strchrnul(line, '\n');
-
-		if ((msg + len <= eol) || line == eol)
-			return NULL;
-		if (line + key_len < eol &&
-		    !memcmp(line, key, key_len) && line[key_len] == ' ') {
-			int offset = key_len + 1;
-			if (next_line)
-				*next_line = *eol ? eol + 1 : eol;
-			return xmemdupz(line + offset, (eol - line) - offset);
-		}
-		line = *eol ? eol + 1 : NULL;
-	}
-	return NULL;
+	size_t out_len;
+	const char *val = find_header_mem(msg, len, key, &out_len);
+
+	if (!val)
+		return NULL;
+
+	if (next_line)
+		*next_line = val + out_len + 1;
+
+	return xmemdupz(val, out_len);
 }
 
 /*
diff --git a/commit.c b/commit.c
index a348f085b2b..f5224e65de2 100644
--- a/commit.c
+++ b/commit.c
@@ -1631,12 +1631,20 @@ struct commit_list **commit_list_append(struct commit *commit,
 	return &new_commit->next;
 }
 
-const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
+const char *find_header_mem(const char *msg, size_t len,
+			const char *key, size_t *out_len)
 {
 	int key_len = strlen(key);
 	const char *line = msg;
 
-	while (line) {
+	/*
+	 * NEEDSWORK: It's possible for strchrnul() to scan beyond the range
+	 * given by len. However, current callers are safe because they compute
+	 * len by scanning a NUL-terminated block of memory starting at msg.
+	 * Make this function safer by checking if the input is NUL-terminated
+	 * or has a NL between line[0] and msg[len].
+	 */
+	while (line && line < msg + len) {
 		const char *eol = strchrnul(line, '\n');
 
 		if (line == eol)
@@ -1653,6 +1661,10 @@ const char *find_commit_header(const char *msg, const char *key, size_t *out_len
 	return NULL;
 }
 
+const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
+{
+	return find_header_mem(msg, strlen(msg), key, out_len);
+}
 /*
  * Inspect the given string and determine the true "end" of the log message, in
  * order to find where to put a new Signed-off-by trailer.  Ignored are
diff --git a/commit.h b/commit.h
index 3ea32766bcb..38cc5426615 100644
--- a/commit.h
+++ b/commit.h
@@ -290,12 +290,17 @@ void free_commit_extra_headers(struct commit_extra_header *extra);
 
 /*
  * Search the commit object contents given by "msg" for the header "key".
+ * Reads up to "len" bytes of "msg".
  * Returns a pointer to the start of the header contents, or NULL. The length
  * of the header, up to the first newline, is returned via out_len.
  *
  * Note that some headers (like mergetag) may be multi-line. It is the caller's
  * responsibility to parse further in this case!
  */
+const char *find_header_mem(const char *msg, size_t len,
+			const char *key,
+			size_t *out_len);
+
 const char *find_commit_header(const char *msg, const char *key,
 			       size_t *out_len);
 

base-commit: c8b2ade48c204690119936ada89cd938c476c5c2
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v5] receive-pack.c: consolidate find header logic
  2022-01-06  0:51       ` [PATCH v5] " John Cai via GitGitGadget
@ 2022-01-06 19:40         ` Junio C Hamano
  2022-01-06 20:07         ` [PATCH v6] " John Cai via GitGitGadget
  1 sibling, 0 replies; 16+ messages in thread
From: Junio C Hamano @ 2022-01-06 19:40 UTC (permalink / raw)
  To: John Cai via GitGitGadget; +Cc: git, John Cai

"John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:

>      ++	 * NEEDSWORK: It's possible for strchrnul() to scan beyond the range
>      ++	 * given by len. However, current callers are safe because they compute
>      ++	 * len by scanning a NUL-terminated block of memory starting at msg.

OK.  Because the caller didn't die with SIGBUS owhen they scanned
the memory to figure out len, we know scanning between msg[0] and
msg[len] is safe.

>      ++	 * Make this function safer by checking if the input is NUL-terminated
>      ++	 * or has a NL between line[0] and msg[len].

This comment however feels wrong.  The function is _safe_ if the
input is NUL-terminated already.  And it is very expected that the
msg[] has LF between msg[0] and msg[len]---after all that is why we
use strchrnul() to scan it.

Perhaps replace it with something like "But nevertheless it would be
better to make sure the function does not look at msg beyond len
that was provided by the caller", perhaps?

The most problematic case happens when msg points into somewhere
inside a page and &msg[len] is still inside that page, but (1) there
is no LF nor NUL in the page after &msg[0], and (2) the page after
the page &msg[0] and &msg[len] are in is not mapped.  Letting
strchrnul() scan msg[] beyond len will lead us to attempt to read
the first byte of the next page and killed with SIGBUS (I recall
that I had to debug a mysterious breakage in strlen or strchr on
SunOS, which tried to grab 8-byte at a time and crossed page
boundary to get an unnecessary SIGBUS).

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v6] receive-pack.c: consolidate find header logic
  2022-01-06  0:51       ` [PATCH v5] " John Cai via GitGitGadget
  2022-01-06 19:40         ` Junio C Hamano
@ 2022-01-06 20:07         ` John Cai via GitGitGadget
  2022-01-08  4:54           ` John Cai
  2022-01-08  7:11           ` Junio C Hamano
  1 sibling, 2 replies; 16+ messages in thread
From: John Cai via GitGitGadget @ 2022-01-06 20:07 UTC (permalink / raw)
  To: git; +Cc: John Cai, John Cai

From: John Cai <johncai86@gmail.com>

There are two functions that have very similar logic of finding a header
value. find_commit_header, and find_header. We can conslidate the logic
by introducing a new function find_header_mem, which is equivalent to
find_commit_header except it takes a len parameter that determines how
many bytes will be read. find_commit_header and find_header can then both
call find_header_mem.

This reduces duplicate logic, as the logic for finding header values
can now all live in one place.

Signed-off-by: John Cai <johncai86@gmail.com>
---
    Consolidate find_header logic into one function
    
    This addresses the NEEDSWORK comment in builtin/receive-pack.c:
    
     /**
       * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
       * after dropping "_commit" from its name and possibly moving it out
       * of commit.c
       **/
    
    
    There are two functions that have very similar logic of finding a header
    value. find_commit_header, and find_header. We can conslidate the logic
    by introducing a new function find_header_mem, which is equivalent to
    find_commit_header except it takes a len parameter that determines how
    many bytes will be read. find_commit_header and find_header can then
    both call find_header_mem.
    
    This reduces duplicate logic, as the logic for finding header values can
    now all live in one place.
    
    Changes since v4:
    
     * adjust verbiage of NEEDSWORK comment block
    
    Changes since v3:
    
     * added NEEDSWORK block detailing what needs to be done to clean up
       find_header_mem
     * fixed verbiage in commit message
     * adjusted style of an if block (based on Junio's feedback)

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1125%2Fjohn-cai%2Fjc%2Freplace-find-header-v6
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1125/john-cai/jc/replace-find-header-v6
Pull-Request: https://github.com/git/git/pull/1125

Range-diff vs v5:

 1:  02da3136c43 ! 1:  2b01d92b951 receive-pack.c: consolidate find header logic
     @@ commit.c: struct commit_list **commit_list_append(struct commit *commit,
      +	 * NEEDSWORK: It's possible for strchrnul() to scan beyond the range
      +	 * given by len. However, current callers are safe because they compute
      +	 * len by scanning a NUL-terminated block of memory starting at msg.
     -+	 * Make this function safer by checking if the input is NUL-terminated
     -+	 * or has a NL between line[0] and msg[len].
     ++	 * Nonetheless, it would be better to ensure the function does not look
     ++	 * at msg beyond the len provided by the caller.
      +	 */
      +	while (line && line < msg + len) {
       		const char *eol = strchrnul(line, '\n');


 builtin/receive-pack.c | 33 ++++++++++-----------------------
 commit.c               | 16 ++++++++++++++--
 commit.h               |  5 +++++
 3 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 9f4a0b816cf..5c2732a0d07 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -581,32 +581,19 @@ static char *prepare_push_cert_nonce(const char *path, timestamp_t stamp)
 	return strbuf_detach(&buf, NULL);
 }
 
-/*
- * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
- * after dropping "_commit" from its name and possibly moving it out
- * of commit.c
- */
 static char *find_header(const char *msg, size_t len, const char *key,
 			 const char **next_line)
 {
-	int key_len = strlen(key);
-	const char *line = msg;
-
-	while (line && line < msg + len) {
-		const char *eol = strchrnul(line, '\n');
-
-		if ((msg + len <= eol) || line == eol)
-			return NULL;
-		if (line + key_len < eol &&
-		    !memcmp(line, key, key_len) && line[key_len] == ' ') {
-			int offset = key_len + 1;
-			if (next_line)
-				*next_line = *eol ? eol + 1 : eol;
-			return xmemdupz(line + offset, (eol - line) - offset);
-		}
-		line = *eol ? eol + 1 : NULL;
-	}
-	return NULL;
+	size_t out_len;
+	const char *val = find_header_mem(msg, len, key, &out_len);
+
+	if (!val)
+		return NULL;
+
+	if (next_line)
+		*next_line = val + out_len + 1;
+
+	return xmemdupz(val, out_len);
 }
 
 /*
diff --git a/commit.c b/commit.c
index a348f085b2b..28391c3468d 100644
--- a/commit.c
+++ b/commit.c
@@ -1631,12 +1631,20 @@ struct commit_list **commit_list_append(struct commit *commit,
 	return &new_commit->next;
 }
 
-const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
+const char *find_header_mem(const char *msg, size_t len,
+			const char *key, size_t *out_len)
 {
 	int key_len = strlen(key);
 	const char *line = msg;
 
-	while (line) {
+	/*
+	 * NEEDSWORK: It's possible for strchrnul() to scan beyond the range
+	 * given by len. However, current callers are safe because they compute
+	 * len by scanning a NUL-terminated block of memory starting at msg.
+	 * Nonetheless, it would be better to ensure the function does not look
+	 * at msg beyond the len provided by the caller.
+	 */
+	while (line && line < msg + len) {
 		const char *eol = strchrnul(line, '\n');
 
 		if (line == eol)
@@ -1653,6 +1661,10 @@ const char *find_commit_header(const char *msg, const char *key, size_t *out_len
 	return NULL;
 }
 
+const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
+{
+	return find_header_mem(msg, strlen(msg), key, out_len);
+}
 /*
  * Inspect the given string and determine the true "end" of the log message, in
  * order to find where to put a new Signed-off-by trailer.  Ignored are
diff --git a/commit.h b/commit.h
index 3ea32766bcb..38cc5426615 100644
--- a/commit.h
+++ b/commit.h
@@ -290,12 +290,17 @@ void free_commit_extra_headers(struct commit_extra_header *extra);
 
 /*
  * Search the commit object contents given by "msg" for the header "key".
+ * Reads up to "len" bytes of "msg".
  * Returns a pointer to the start of the header contents, or NULL. The length
  * of the header, up to the first newline, is returned via out_len.
  *
  * Note that some headers (like mergetag) may be multi-line. It is the caller's
  * responsibility to parse further in this case!
  */
+const char *find_header_mem(const char *msg, size_t len,
+			const char *key,
+			size_t *out_len);
+
 const char *find_commit_header(const char *msg, const char *key,
 			       size_t *out_len);
 

base-commit: c8b2ade48c204690119936ada89cd938c476c5c2
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v6] receive-pack.c: consolidate find header logic
  2022-01-06 20:07         ` [PATCH v6] " John Cai via GitGitGadget
@ 2022-01-08  4:54           ` John Cai
  2022-01-08  7:11           ` Junio C Hamano
  1 sibling, 0 replies; 16+ messages in thread
From: John Cai @ 2022-01-08  4:54 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Hi Junio,


On 6 Jan 2022, at 15:07, John Cai via GitGitGadget wrote:

> From: John Cai <johncai86@gmail.com>
>
> There are two functions that have very similar logic of finding a header
> value. find_commit_header, and find_header. We can conslidate the logic
> by introducing a new function find_header_mem, which is equivalent to
> find_commit_header except it takes a len parameter that determines how
> many bytes will be read. find_commit_header and find_header can then both
> call find_header_mem.
>
> This reduces duplicate logic, as the logic for finding header values
> can now all live in one place.
>
> Signed-off-by: John Cai <johncai86@gmail.com>
> ---
>     Consolidate find_header logic into one function
>
>     This addresses the NEEDSWORK comment in builtin/receive-pack.c:
>
>      /**
>        * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
>        * after dropping "_commit" from its name and possibly moving it out
>        * of commit.c
>        **/
>
>
>     There are two functions that have very similar logic of finding a header
>     value. find_commit_header, and find_header. We can conslidate the logic
>     by introducing a new function find_header_mem, which is equivalent to
>     find_commit_header except it takes a len parameter that determines how
>     many bytes will be read. find_commit_header and find_header can then
>     both call find_header_mem.
>
>     This reduces duplicate logic, as the logic for finding header values can
>     now all live in one place.
>
>     Changes since v4:
>
>      * adjust verbiage of NEEDSWORK comment block
>
>     Changes since v3:
>
>      * added NEEDSWORK block detailing what needs to be done to clean up
>        find_header_mem
>      * fixed verbiage in commit message
>      * adjusted style of an if block (based on Junio's feedback)
>
> Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1125%2Fjohn-cai%2Fjc%2Freplace-find-header-v6
> Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1125/john-cai/jc/replace-find-header-v6
> Pull-Request: https://github.com/git/git/pull/1125
>
> Range-diff vs v5:
>
>  1:  02da3136c43 ! 1:  2b01d92b951 receive-pack.c: consolidate find header logic
>      @@ commit.c: struct commit_list **commit_list_append(struct commit *commit,
>       +	 * NEEDSWORK: It's possible for strchrnul() to scan beyond the range
>       +	 * given by len. However, current callers are safe because they compute
>       +	 * len by scanning a NUL-terminated block of memory starting at msg.
>      -+	 * Make this function safer by checking if the input is NUL-terminated
>      -+	 * or has a NL between line[0] and msg[len].
>      ++	 * Nonetheless, it would be better to ensure the function does not look
>      ++	 * at msg beyond the len provided by the caller.
>       +	 */
>       +	while (line && line < msg + len) {
>        		const char *eol = strchrnul(line, '\n');
>
>
>  builtin/receive-pack.c | 33 ++++++++++-----------------------
>  commit.c               | 16 ++++++++++++++--
>  commit.h               |  5 +++++
>  3 files changed, 29 insertions(+), 25 deletions(-)
>
> diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
> index 9f4a0b816cf..5c2732a0d07 100644
> --- a/builtin/receive-pack.c
> +++ b/builtin/receive-pack.c
> @@ -581,32 +581,19 @@ static char *prepare_push_cert_nonce(const char *path, timestamp_t stamp)
>  	return strbuf_detach(&buf, NULL);
>  }
>
> -/*
> - * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
> - * after dropping "_commit" from its name and possibly moving it out
> - * of commit.c
> - */
>  static char *find_header(const char *msg, size_t len, const char *key,
>  			 const char **next_line)
>  {
> -	int key_len = strlen(key);
> -	const char *line = msg;
> -
> -	while (line && line < msg + len) {
> -		const char *eol = strchrnul(line, '\n');
> -
> -		if ((msg + len <= eol) || line == eol)
> -			return NULL;
> -		if (line + key_len < eol &&
> -		    !memcmp(line, key, key_len) && line[key_len] == ' ') {
> -			int offset = key_len + 1;
> -			if (next_line)
> -				*next_line = *eol ? eol + 1 : eol;
> -			return xmemdupz(line + offset, (eol - line) - offset);
> -		}
> -		line = *eol ? eol + 1 : NULL;
> -	}
> -	return NULL;
> +	size_t out_len;
> +	const char *val = find_header_mem(msg, len, key, &out_len);
> +
> +	if (!val)
> +		return NULL;
> +
> +	if (next_line)
> +		*next_line = val + out_len + 1;
> +
> +	return xmemdupz(val, out_len);
>  }
>
>  /*
> diff --git a/commit.c b/commit.c
> index a348f085b2b..28391c3468d 100644
> --- a/commit.c
> +++ b/commit.c
> @@ -1631,12 +1631,20 @@ struct commit_list **commit_list_append(struct commit *commit,
>  	return &new_commit->next;
>  }
>
> -const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
> +const char *find_header_mem(const char *msg, size_t len,
> +			const char *key, size_t *out_len)
>  {
>  	int key_len = strlen(key);
>  	const char *line = msg;
>
> -	while (line) {
> +	/*
> +	 * NEEDSWORK: It's possible for strchrnul() to scan beyond the range
> +	 * given by len. However, current callers are safe because they compute
> +	 * len by scanning a NUL-terminated block of memory starting at msg.
> +	 * Nonetheless, it would be better to ensure the function does not look
> +	 * at msg beyond the len provided by the caller.
> +	 */

Let me know if this verbiage makes sense.

> +	while (line && line < msg + len) {
>  		const char *eol = strchrnul(line, '\n');
>
>  		if (line == eol)
> @@ -1653,6 +1661,10 @@ const char *find_commit_header(const char *msg, const char *key, size_t *out_len
>  	return NULL;
>  }
>
> +const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
> +{
> +	return find_header_mem(msg, strlen(msg), key, out_len);
> +}
>  /*
>   * Inspect the given string and determine the true "end" of the log message, in
>   * order to find where to put a new Signed-off-by trailer.  Ignored are
> diff --git a/commit.h b/commit.h
> index 3ea32766bcb..38cc5426615 100644
> --- a/commit.h
> +++ b/commit.h
> @@ -290,12 +290,17 @@ void free_commit_extra_headers(struct commit_extra_header *extra);
>
>  /*
>   * Search the commit object contents given by "msg" for the header "key".
> + * Reads up to "len" bytes of "msg".
>   * Returns a pointer to the start of the header contents, or NULL. The length
>   * of the header, up to the first newline, is returned via out_len.
>   *
>   * Note that some headers (like mergetag) may be multi-line. It is the caller's
>   * responsibility to parse further in this case!
>   */
> +const char *find_header_mem(const char *msg, size_t len,
> +			const char *key,
> +			size_t *out_len);
> +
>  const char *find_commit_header(const char *msg, const char *key,
>  			       size_t *out_len);
>
>
> base-commit: c8b2ade48c204690119936ada89cd938c476c5c2
> -- 
> gitgitgadget

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v6] receive-pack.c: consolidate find header logic
  2022-01-06 20:07         ` [PATCH v6] " John Cai via GitGitGadget
  2022-01-08  4:54           ` John Cai
@ 2022-01-08  7:11           ` Junio C Hamano
  1 sibling, 0 replies; 16+ messages in thread
From: Junio C Hamano @ 2022-01-08  7:11 UTC (permalink / raw)
  To: John Cai via GitGitGadget; +Cc: git, John Cai

"John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: John Cai <johncai86@gmail.com>
>
> There are two functions that have very similar logic of finding a header
> value. find_commit_header, and find_header. We can conslidate the logic
> by introducing a new function find_header_mem, which is equivalent to
> find_commit_header except it takes a len parameter that determines how
> many bytes will be read. find_commit_header and find_header can then both
> call find_header_mem.
>
> This reduces duplicate logic, as the logic for finding header values
> can now all live in one place.
>
> Signed-off-by: John Cai <johncai86@gmail.com>
> ---
>     Consolidate find_header logic into one function
> +const char *find_header_mem(const char *msg, size_t len,
> +			const char *key, size_t *out_len)
>  {
>  	int key_len = strlen(key);
>  	const char *line = msg;
>  
> -	while (line) {
> +	/*
> +	 * NEEDSWORK: It's possible for strchrnul() to scan beyond the range
> +	 * given by len. However, current callers are safe because they compute
> +	 * len by scanning a NUL-terminated block of memory starting at msg.
> +	 * Nonetheless, it would be better to ensure the function does not look
> +	 * at msg beyond the len provided by the caller.
> +	 */
> +	while (line && line < msg + len) {
>  		const char *eol = strchrnul(line, '\n');

Thanks; queued.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2022-01-08  7:11 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-27 18:26 [PATCH 0/2] Consolidate find_header logic into one function John Cai via GitGitGadget
2021-12-27 18:26 ` [PATCH 1/2] receive-pack.c: consolidate find header logic John Cai via GitGitGadget
2021-12-27 22:33   ` Junio C Hamano
2021-12-27 18:26 ` [PATCH 2/2] commit.c: rename find_commit_header to find_header John Cai via GitGitGadget
2021-12-29  6:19 ` [PATCH v2] receive-pack.c: consolidate find header logic John Cai via GitGitGadget
2021-12-30 23:01   ` Junio C Hamano
2021-12-31  6:17   ` [PATCH v3] " John Cai via GitGitGadget
2022-01-04  1:56     ` Junio C Hamano
2022-01-04 15:12       ` John Cai
2022-01-05 15:21     ` [PATCH v4] " John Cai via GitGitGadget
2022-01-05 20:10       ` Junio C Hamano
2022-01-06  0:51       ` [PATCH v5] " John Cai via GitGitGadget
2022-01-06 19:40         ` Junio C Hamano
2022-01-06 20:07         ` [PATCH v6] " John Cai via GitGitGadget
2022-01-08  4:54           ` John Cai
2022-01-08  7:11           ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).