All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jonathan Tan <jonathantanmy@google.com>
To: git@vger.kernel.org
Cc: Jonathan Tan <jonathantanmy@google.com>,
	peff@peff.net, gitster@pobox.com
Subject: [PATCH v2 2/3] object-file: emit corruption errors when detected
Date: Tue,  6 Dec 2022 16:40:52 -0800	[thread overview]
Message-ID: <9ddfff3585c293c9801570e395b514505796a43f.1670373420.git.jonathantanmy@google.com> (raw)
In-Reply-To: <cover.1670373420.git.jonathantanmy@google.com>

Instead of relying on errno being preserved across function calls, teach
do_oid_object_info_extended() to itself report object corruption when
it first detects it. There are 3 types of corruption being detected:
 - when a replacement object is missing
 - when a loose object is corrupt
 - when a packed object is corrupt and the object cannot be read
   in another way

Note that in the RHS of this patch's diff, a check for ENOENT that was
introduced in 3ba7a06552 (A loose object is not corrupt if it cannot
be read due to EMFILE, 2010-10-28) is also removed. The purpose of this
check is to avoid a false report of corruption if the errno contains
something like EMFILE (or anything that is not ENOENT), in which case
a more generic report is presented. Because, as of this patch, we no
longer rely on such a heuristic to determine corruption, but surface
the error message at the point when we read something that we did not
expect, this check is no longer necessary.

Besides being more resilient, this also prepares for a future patch in
which an indirect caller of do_oid_object_info_extended() will need
such functionality.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 object-file.c  | 63 ++++++++++++++++++++++++++------------------------
 object-store.h |  3 +++
 2 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/object-file.c b/object-file.c
index 596dd049fd..c7a513d123 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1215,7 +1215,8 @@ static int quick_has_loose(struct repository *r,
  * searching for a loose object named "oid".
  */
 static void *map_loose_object_1(struct repository *r, const char *path,
-			     const struct object_id *oid, unsigned long *size)
+				const struct object_id *oid, unsigned long *size,
+				char **mapped_path)
 {
 	void *map;
 	int fd;
@@ -1224,6 +1225,9 @@ static void *map_loose_object_1(struct repository *r, const char *path,
 		fd = git_open(path);
 	else
 		fd = open_loose_object(r, oid, &path);
+	if (mapped_path)
+		*mapped_path = xstrdup(path);
+
 	map = NULL;
 	if (fd >= 0) {
 		struct stat st;
@@ -1247,7 +1251,7 @@ void *map_loose_object(struct repository *r,
 		       const struct object_id *oid,
 		       unsigned long *size)
 {
-	return map_loose_object_1(r, NULL, oid, size);
+	return map_loose_object_1(r, NULL, oid, size, NULL);
 }
 
 enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
@@ -1428,6 +1432,7 @@ static int loose_object_info(struct repository *r,
 {
 	int status = 0;
 	unsigned long mapsize;
+	char *mapped_path = NULL;
 	void *map;
 	git_zstream stream;
 	char hdr[MAX_HEADER_LEN];
@@ -1459,9 +1464,11 @@ static int loose_object_info(struct repository *r,
 		return 0;
 	}
 
-	map = map_loose_object(r, oid, &mapsize);
-	if (!map)
+	map = map_loose_object_1(r, NULL, oid, &mapsize, &mapped_path);
+	if (!map) {
+		free(mapped_path);
 		return -1;
+	}
 
 	if (!oi->sizep)
 		oi->sizep = &size_scratch;
@@ -1497,8 +1504,13 @@ static int loose_object_info(struct repository *r,
 		break;
 	}
 
+	if (status && (flags & OBJECT_INFO_DIE_IF_CORRUPT))
+		die(_("loose object %s (stored in %s) is corrupt"),
+		    oid_to_hex(oid), mapped_path);
+
 	git_inflate_end(&stream);
 cleanup:
+	free(mapped_path);
 	munmap(map, mapsize);
 	if (oi->sizep == &size_scratch)
 		oi->sizep = NULL;
@@ -1608,6 +1620,15 @@ static int do_oid_object_info_extended(struct repository *r,
 			continue;
 		}
 
+		if (flags & OBJECT_INFO_DIE_IF_CORRUPT) {
+			const struct packed_git *p;
+			if ((flags & OBJECT_INFO_LOOKUP_REPLACE) && !oideq(real, oid))
+				die(_("replacement %s not found for %s"),
+				    oid_to_hex(real), oid_to_hex(oid));
+			if ((p = has_packed_and_bad(r, real)))
+				die(_("packed object %s (stored in %s) is corrupt"),
+				    oid_to_hex(real), p->pack_name);
+		}
 		return -1;
 	}
 
@@ -1660,7 +1681,8 @@ int oid_object_info(struct repository *r,
 
 static void *read_object(struct repository *r,
 			 const struct object_id *oid, enum object_type *type,
-			 unsigned long *size)
+			 unsigned long *size,
+			 int die_if_corrupt)
 {
 	struct object_info oi = OBJECT_INFO_INIT;
 	void *content;
@@ -1668,7 +1690,9 @@ static void *read_object(struct repository *r,
 	oi.sizep = size;
 	oi.contentp = &content;
 
-	if (oid_object_info_extended(r, oid, &oi, 0) < 0)
+	if (oid_object_info_extended(r, oid, &oi,
+				     die_if_corrupt ? OBJECT_INFO_DIE_IF_CORRUPT : 0)
+	    < 0)
 		return NULL;
 	return content;
 }
@@ -1704,35 +1728,14 @@ void *read_object_file_extended(struct repository *r,
 				int lookup_replace)
 {
 	void *data;
-	const struct packed_git *p;
-	const char *path;
-	struct stat st;
 	const struct object_id *repl = lookup_replace ?
 		lookup_replace_object(r, oid) : oid;
 
 	errno = 0;
-	data = read_object(r, repl, type, size);
+	data = read_object(r, repl, type, size, 1);
 	if (data)
 		return data;
 
-	obj_read_lock();
-	if (errno && errno != ENOENT)
-		die_errno(_("failed to read object %s"), oid_to_hex(oid));
-
-	/* die if we replaced an object with one that does not exist */
-	if (repl != oid)
-		die(_("replacement %s not found for %s"),
-		    oid_to_hex(repl), oid_to_hex(oid));
-
-	if (!stat_loose_object(r, repl, &st, &path))
-		die(_("loose object %s (stored in %s) is corrupt"),
-		    oid_to_hex(repl), path);
-
-	if ((p = has_packed_and_bad(r, repl)))
-		die(_("packed object %s (stored in %s) is corrupt"),
-		    oid_to_hex(repl), p->pack_name);
-	obj_read_unlock();
-
 	return NULL;
 }
 
@@ -2275,7 +2278,7 @@ int force_object_loose(const struct object_id *oid, time_t mtime)
 
 	if (has_loose_object(oid))
 		return 0;
-	buf = read_object(the_repository, oid, &type, &len);
+	buf = read_object(the_repository, oid, &type, &len, 0);
 	if (!buf)
 		return error(_("cannot read object for %s"), oid_to_hex(oid));
 	hdrlen = format_object_header(hdr, sizeof(hdr), type, len);
@@ -2797,7 +2800,7 @@ int read_loose_object(const char *path,
 	char hdr[MAX_HEADER_LEN];
 	unsigned long *size = oi->sizep;
 
-	map = map_loose_object_1(the_repository, path, NULL, &mapsize);
+	map = map_loose_object_1(the_repository, path, NULL, &mapsize, NULL);
 	if (!map) {
 		error_errno(_("unable to mmap %s"), path);
 		goto out;
diff --git a/object-store.h b/object-store.h
index 1be57abaf1..01134ab5ec 100644
--- a/object-store.h
+++ b/object-store.h
@@ -447,6 +447,9 @@ struct object_info {
  */
 #define OBJECT_INFO_FOR_PREFETCH (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK)
 
+/* Die if object corruption (not just an object being missing) was detected. */
+#define OBJECT_INFO_DIE_IF_CORRUPT 64
+
 int oid_object_info_extended(struct repository *r,
 			     const struct object_id *,
 			     struct object_info *, unsigned flags);
-- 
2.39.0.rc0.267.gcb52ba06e7-goog


  parent reply	other threads:[~2022-12-07  0:41 UTC|newest]

Thread overview: 85+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-30 20:30 [PATCH 0/4] Don't lazy-fetch commits when parsing them Jonathan Tan
2022-11-30 20:30 ` [PATCH 1/4] object-file: reread object with exact same args Jonathan Tan
2022-11-30 20:30 ` [PATCH 2/4] object-file: refactor corrupt object diagnosis Jonathan Tan
2022-11-30 20:47   ` Jeff King
2022-11-30 23:42     ` Junio C Hamano
2022-12-01 19:06       ` Jonathan Tan
2022-11-30 20:30 ` [PATCH 3/4] object-file: refactor replace object lookup Jonathan Tan
2022-11-30 20:54   ` Jeff King
2022-11-30 20:30 ` [PATCH 4/4] commit: don't lazy-fetch commits Jonathan Tan
2022-11-30 21:04   ` Jeff King
2022-12-01 19:11     ` Jonathan Tan
2022-12-01 19:33       ` Jeff King
2022-11-30 23:56   ` Junio C Hamano
2022-11-30 21:06 ` [PATCH 0/4] Don't lazy-fetch commits when parsing them Jeff King
2022-12-01 19:27 ` [PATCH v2 " Jonathan Tan
2022-12-01 19:27   ` [PATCH v2 1/4] object-file: reread object with exact same args Jonathan Tan
2022-12-01 19:27   ` [PATCH v2 2/4] object-file: refactor corrupt object diagnosis Jonathan Tan
2022-12-01 19:27   ` [PATCH v2 3/4] object-file: refactor replace object lookup Jonathan Tan
2022-12-01 19:27   ` [PATCH v2 4/4] commit: don't lazy-fetch commits Jonathan Tan
2022-12-01 19:54   ` [PATCH v2 0/4] Don't lazy-fetch commits when parsing them Jeff King
2022-12-01 21:26     ` Jonathan Tan
2022-12-02  0:23       ` Jeff King
2022-12-06  0:49         ` Jonathan Tan
2022-12-06  2:03           ` Jeff King
2022-12-01 23:09     ` Junio C Hamano
2022-12-07  0:40 ` [PATCH v2 0/3] " Jonathan Tan
2022-12-07  0:40   ` [PATCH v2 1/3] object-file: don't exit early if skipping loose Jonathan Tan
2022-12-07  1:12     ` Junio C Hamano
2022-12-07  6:14       ` Jeff King
2022-12-07  6:43         ` Junio C Hamano
2022-12-07 23:20           ` Jonathan Tan
2022-12-07  0:40   ` Jonathan Tan [this message]
2022-12-07  1:16     ` [PATCH v2 2/3] object-file: emit corruption errors when detected Junio C Hamano
2022-12-07  4:05     ` Ævar Arnfjörð Bjarmason
2022-12-07  7:07       ` Jeff King
2022-12-07 10:33         ` Ævar Arnfjörð Bjarmason
2022-12-07 23:26           ` Jonathan Tan
2022-12-07 23:50             ` Ævar Arnfjörð Bjarmason
2022-12-08  6:33               ` Jeff King
2022-12-07  6:42     ` Jeff King
2022-12-07  0:40   ` [PATCH v2 3/3] commit: don't lazy-fetch commits Jonathan Tan
2022-12-07  1:17     ` Junio C Hamano
2022-12-07  6:47     ` Jeff King
2022-12-08 20:57 ` [PATCH v3 0/4] Don't lazy-fetch commits when parsing them Jonathan Tan
2022-12-08 20:57   ` [PATCH v3 1/4] object-file: remove OBJECT_INFO_IGNORE_LOOSE Jonathan Tan
2022-12-08 20:57   ` [PATCH v3 2/4] object-file: refactor map_loose_object_1() Jonathan Tan
2022-12-09  2:00     ` Jeff King
2022-12-09 18:17       ` Jonathan Tan
2022-12-09 20:27         ` Jeff King
2022-12-09 20:27           ` Jeff King
2022-12-08 20:57   ` [PATCH v3 3/4] object-file: emit corruption errors when detected Jonathan Tan
2022-12-09  1:56     ` Jeff King
2022-12-09 18:26       ` Jonathan Tan
2022-12-09 14:19     ` Ævar Arnfjörð Bjarmason
2022-12-09 18:33       ` Jonathan Tan
2022-12-08 20:57   ` [PATCH v3 4/4] commit: don't lazy-fetch commits Jonathan Tan
2022-12-09 14:14     ` Ævar Arnfjörð Bjarmason
2022-12-09 21:44 ` [PATCH v4 0/4] Don't lazy-fetch commits when parsing them Jonathan Tan
2022-12-09 21:44   ` [PATCH v4 1/4] object-file: remove OBJECT_INFO_IGNORE_LOOSE Jonathan Tan
2022-12-09 21:44   ` [PATCH v4 2/4] object-file: refactor map_loose_object_1() Jonathan Tan
2022-12-09 21:44   ` [PATCH v4 3/4] object-file: emit corruption errors when detected Jonathan Tan
2022-12-10  0:16     ` Junio C Hamano
2022-12-12 20:38       ` Jonathan Tan
2022-12-12 20:49       ` Jeff King
2022-12-12 20:59         ` Jonathan Tan
2022-12-12 21:20           ` Jeff King
2022-12-12 21:29             ` Jonathan Tan
2022-12-12 22:17               ` Jeff King
2022-12-12 22:52             ` Jonathan Tan
2022-12-13 10:37               ` Jeff King
2022-12-09 21:44   ` [PATCH v4 4/4] commit: don't lazy-fetch commits Jonathan Tan
2022-12-12 22:48 ` [PATCH v5 0/4] Don't lazy-fetch commits when parsing them Jonathan Tan
2022-12-12 22:48   ` [PATCH v5 1/4] object-file: remove OBJECT_INFO_IGNORE_LOOSE Jonathan Tan
2022-12-12 22:48   ` [PATCH v5 2/4] object-file: refactor map_loose_object_1() Jonathan Tan
2022-12-12 22:48   ` [PATCH v5 3/4] object-file: emit corruption errors when detected Jonathan Tan
2022-12-13  1:51     ` Junio C Hamano
2022-12-13 10:38       ` Jeff King
2022-12-12 22:48   ` [PATCH v5 4/4] commit: don't lazy-fetch commits Jonathan Tan
2022-12-14 19:17 ` [PATCH v6 0/4] Don't lazy-fetch commits when parsing them Jonathan Tan
2022-12-14 19:17   ` [PATCH v6 1/4] object-file: remove OBJECT_INFO_IGNORE_LOOSE Jonathan Tan
2022-12-14 19:17   ` [PATCH v6 2/4] object-file: refactor map_loose_object_1() Jonathan Tan
2022-12-14 19:17   ` [PATCH v6 3/4] object-file: emit corruption errors when detected Jonathan Tan
2022-12-14 19:17   ` [PATCH v6 4/4] commit: don't lazy-fetch commits Jonathan Tan
2022-12-14 20:43   ` [PATCH v6 0/4] Don't lazy-fetch commits when parsing them Jeff King
2022-12-15  0:07     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9ddfff3585c293c9801570e395b514505796a43f.1670373420.git.jonathantanmy@google.com \
    --to=jonathantanmy@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.