All of lore.kernel.org
 help / color / mirror / Atom feed
From: Luke Shumaker <lukeshu@lukeshu.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
	"Elijah Newren" <newren@gmail.com>, "Jeff King" <peff@peff.net>,
	"Johannes Schindelin" <Johannes.Schindelin@gmx.de>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>,
	"Taylor Blau" <me@ttaylorr.com>,
	"brian m . carlson" <sandals@crustytoothpaste.net>,
	"Eric Sunshine" <sunshine@sunshineco.com>,
	"Luke Shumaker" <lukeshu@datawire.io>
Subject: [PATCH v4 4/5] fast-export: do not modify memory from get_commit_buffer
Date: Fri, 30 Apr 2021 17:25:36 -0600	[thread overview]
Message-ID: <20210430232537.1131641-5-lukeshu@lukeshu.com> (raw)
In-Reply-To: <20210430232537.1131641-1-lukeshu@lukeshu.com>

From: Luke Shumaker <lukeshu@datawire.io>

fast-export's helper function find_encoding() takes a `const char *`, but
modifies that memory despite the `const`.  Ultimately, this memory came
from get_commit_buffer(), and you're not supposed to modify the memory
that you get from get_commit_buffer().

So, get rid of find_encoding() in favor of commit.h:find_commit_header(),
which gives back a string length, rather than mutating the memory to
insert a '\0' terminator.

Because find_commit_header() detects the "\n\n" string that separates the
headers and the commit message, move the call to be above the
`message = strstr(..., "\n\n")` call.  This helps readability, and allows
for the value of `encoding` to be used for a better value of "..." so that
the same memory doesn't need to be checked twice.  Introduce a
`commit_buffer_cursor` variable to avoid writing an awkward
`encoding ? encoding + encoding_len : committer_end` expression.

Signed-off-by: Luke Shumaker <lukeshu@datawire.io>
---

Notes:
    v4: This commit is new in v4.

 builtin/fast-export.c | 65 ++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 28 deletions(-)

diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index d1cb8a3183..81f3fb1f05 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -499,21 +499,6 @@ static void show_filemodify(struct diff_queue_struct *q,
 	}
 }
 
-static const char *find_encoding(const char *begin, const char *end)
-{
-	const char *needle = "\nencoding ";
-	char *bol, *eol;
-
-	bol = memmem(begin, end ? end - begin : strlen(begin),
-		     needle, strlen(needle));
-	if (!bol)
-		return NULL;
-	bol += strlen(needle);
-	eol = strchrnul(bol, '\n');
-	*eol = '\0';
-	return bol;
-}
-
 static char *anonymize_ref_component(void *data)
 {
 	static int counter;
@@ -615,13 +600,26 @@ static void anonymize_ident_line(const char **beg, const char **end)
 	*end = out->buf + out->len;
 }
 
+static char *reencode_message(const char *in_msg,
+			      const char *in_encoding, size_t in_encoding_len)
+{
+	static struct strbuf in_encoding_buf = STRBUF_INIT;
+
+	strbuf_reset(&in_encoding_buf);
+	strbuf_add(&in_encoding_buf, in_encoding, in_encoding_len);
+
+	return reencode_string(in_msg, "UTF-8", in_encoding_buf.buf);
+}
+
 static void handle_commit(struct commit *commit, struct rev_info *rev,
 			  struct string_list *paths_of_changed_objects)
 {
 	int saved_output_format = rev->diffopt.output_format;
-	const char *commit_buffer;
+	const char *commit_buffer, *commit_buffer_cursor;
 	const char *author, *author_end, *committer, *committer_end;
-	const char *encoding, *message;
+	const char *encoding;
+	size_t encoding_len;
+	const char *message;
 	char *reencoded = NULL;
 	struct commit_list *p;
 	const char *refname;
@@ -630,21 +628,31 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
 	rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
 
 	parse_commit_or_die(commit);
-	commit_buffer = get_commit_buffer(commit, NULL);
-	author = strstr(commit_buffer, "\nauthor ");
+	commit_buffer_cursor = commit_buffer = get_commit_buffer(commit, NULL);
+
+	author = strstr(commit_buffer_cursor, "\nauthor ");
 	if (!author)
 		die("could not find author in commit %s",
 		    oid_to_hex(&commit->object.oid));
 	author++;
-	author_end = strchrnul(author, '\n');
-	committer = strstr(author_end, "\ncommitter ");
+	commit_buffer_cursor = author_end = strchrnul(author, '\n');
+
+	committer = strstr(commit_buffer_cursor, "\ncommitter ");
 	if (!committer)
 		die("could not find committer in commit %s",
 		    oid_to_hex(&commit->object.oid));
 	committer++;
-	committer_end = strchrnul(committer, '\n');
-	message = strstr(committer_end, "\n\n");
-	encoding = find_encoding(committer_end, message);
+	commit_buffer_cursor = committer_end = strchrnul(committer, '\n');
+
+	/* find_commit_header() gets a `+ 1` because
+	 * commit_buffer_cursor points at the trailing "\n" at the end
+	 * of the previous line, but find_commit_header() wants a
+	 * pointer to the beginning of the next line. */
+	encoding = find_commit_header(commit_buffer_cursor + 1, "encoding", &encoding_len);
+	if (encoding)
+		commit_buffer_cursor = encoding + encoding_len;
+
+	message = strstr(commit_buffer_cursor, "\n\n");
 	if (message)
 		message += 2;
 
@@ -685,14 +693,15 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
 	} else if (encoding) {
 		switch(reencode_mode) {
 		case REENCODE_YES:
-			reencoded = reencode_string(message, "UTF-8", encoding);
+			reencoded = reencode_message(message, encoding, encoding_len);
 			break;
 		case REENCODE_NO:
 			break;
 		case REENCODE_ABORT:
-			die("Encountered commit-specific encoding %s in commit "
+			die("Encountered commit-specific encoding %.*s in commit "
 			    "%s; use --reencode=[yes|no] to handle it",
-			    encoding, oid_to_hex(&commit->object.oid));
+			    (int)encoding_len, encoding,
+			    oid_to_hex(&commit->object.oid));
 		}
 	}
 	if (!commit->parents)
@@ -704,7 +713,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
 	       (int)(author_end - author), author,
 	       (int)(committer_end - committer), committer);
 	if (!reencoded && encoding)
-		printf("encoding %s\n", encoding);
+		printf("encoding %.*s\n", (int)encoding_len, encoding);
 	printf("data %u\n%s",
 	       (unsigned)(reencoded
 			  ? strlen(reencoded) : message
-- 
2.31.1


  parent reply	other threads:[~2021-04-30 23:26 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-22  0:27 [PATCH v2 0/3] fast-export, fast-import: implement signed-commits Luke Shumaker
2021-04-22  0:27 ` [PATCH v2 1/3] git-fast-import.txt: add missing LF in the BNF Luke Shumaker
2021-04-22  0:27 ` [PATCH v2 2/3] fast-export: rename --signed-tags='warn' to 'warn-verbatim' Luke Shumaker
2021-04-22  3:59   ` Eric Sunshine
2021-04-22  4:43     ` Luke Shumaker
2021-04-22  4:50       ` Luke Shumaker
2021-04-22  0:27 ` [PATCH v2 3/3] fast-export, fast-import: implement signed-commits Luke Shumaker
2021-04-23 16:41 ` [PATCH v3 0/3] " Luke Shumaker
2021-04-23 16:41   ` [PATCH v3 1/3] git-fast-import.txt: add missing LF in the BNF Luke Shumaker
2021-04-23 16:41   ` [PATCH v3 2/3] fast-export: rename --signed-tags='warn' to 'warn-verbatim' Luke Shumaker
2021-04-28  3:29     ` Junio C Hamano
2021-04-29 19:02       ` Luke Shumaker
2021-04-30  0:03         ` Junio C Hamano
2021-04-23 16:41   ` [PATCH v3 3/3] fast-export, fast-import: implement signed-commits Luke Shumaker
2021-04-28  4:02     ` Junio C Hamano
2021-04-29 20:06       ` Luke Shumaker
2021-04-29 22:38         ` Elijah Newren
2021-04-29 23:42           ` Junio C Hamano
2021-04-30  2:23             ` Elijah Newren
2021-04-30  3:20               ` Junio C Hamano
2021-04-30 17:07             ` Luke Shumaker
2021-04-30 19:34       ` Luke Shumaker
2021-04-30 19:59         ` Elijah Newren
2021-04-30 22:21           ` Luke Shumaker
2021-04-30 23:25   ` [PATCH v4 0/5] fast-export, fast-import: add support for signed-commits Luke Shumaker
2021-04-30 23:25     ` [PATCH v4 1/5] git-fast-import.txt: add missing LF in the BNF Luke Shumaker
2021-04-30 23:25     ` [PATCH v4 2/5] fast-export: rename --signed-tags='warn' to 'warn-verbatim' Luke Shumaker
2021-04-30 23:25     ` [PATCH v4 3/5] git-fast-export.txt: clarify why 'verbatim' may not be a good idea Luke Shumaker
2021-04-30 23:25     ` Luke Shumaker [this message]
2021-05-03  4:41       ` [PATCH v4 4/5] fast-export: do not modify memory from get_commit_buffer Junio C Hamano
2021-04-30 23:25     ` [PATCH v4 5/5] fast-export, fast-import: add support for signed-commits Luke Shumaker
2021-05-03  5:09       ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210430232537.1131641-5-lukeshu@lukeshu.com \
    --to=lukeshu@lukeshu.com \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=lukeshu@datawire.io \
    --cc=me@ttaylorr.com \
    --cc=newren@gmail.com \
    --cc=pclouds@gmail.com \
    --cc=peff@peff.net \
    --cc=sandals@crustytoothpaste.net \
    --cc=sunshine@sunshineco.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.