All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Victoria Dye via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: rsbecker@nexbridge.com, gitster@pobox.com,
	Victoria Dye <vdye@github.com>, Victoria Dye <vdye@github.com>
Subject: [PATCH] diagnose.c: refactor to safely use 'd_type'
Date: Sat, 17 Sep 2022 18:16:55 +0000	[thread overview]
Message-ID: <pull.1354.git.1663438615413.gitgitgadget@gmail.com> (raw)

From: Victoria Dye <vdye@github.com>

Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.

Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.

Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.

Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
---
    diagnose.c: refactor to safely use 'd_type'
    
    This fixes the compilation error reported by Randall in [1], which
    happens because 'd_type' doesn't exist in the 'dirent' struct on all
    platforms.
    
    The implementation of new-'get_dtype()' shares some code with
    'resolve_dtype()' (and its previous incarnations as old-'get_dtype()')
    but, to keep this fix down to a single patch, I didn't refactor the bits
    of shared logic into a common location. I'm happy to do that if it would
    be a valuable addition to this fix, but I wanted to start small and
    avoid the churn of a refactor if it isn't something people want here.
    
    Thanks!
    
     * Victoria
    
    [1]
    https://lore.kernel.org/git/011001d8ca20$bc4d81f0$34e885d0$@nexbridge.com/

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1354%2Fvdye%2Fbugfix%2Ffix-dtype-usage-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1354/vdye/bugfix/fix-dtype-usage-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/1354

 diagnose.c          | 70 ++++++++++++++++++++++++++++++++++++---------
 t/t0092-diagnose.sh | 12 ++++++++
 2 files changed, 68 insertions(+), 14 deletions(-)

diff --git a/diagnose.c b/diagnose.c
index beb0a8741ba..8f265698966 100644
--- a/diagnose.c
+++ b/diagnose.c
@@ -66,17 +66,53 @@ static int dir_file_stats(struct object_directory *object_dir, void *data)
 	return 0;
 }
 
-static int count_files(char *path)
+/*
+ * Get the d_type of a dirent. If the d_type is unknown, derive it from
+ * stat.st_mode.
+ *
+ * Note that 'path' is assumed to have a trailing slash. It is also modified
+ * in-place during the execution of the function, but is then reverted to its
+ * original value before returning.
+ */
+static unsigned char get_dtype(struct dirent *e, struct strbuf *path)
 {
-	DIR *dir = opendir(path);
+	struct stat st;
+	unsigned char dtype = DTYPE(e);
+	size_t base_path_len;
+
+	if (dtype != DT_UNKNOWN)
+		return dtype;
+
+	/* d_type unknown in dirent, try to fall back on lstat results */
+	base_path_len = path->len;
+	strbuf_addstr(path, e->d_name);
+	if (lstat(path->buf, &st))
+		goto cleanup;
+
+	/* determine d_type from st_mode */
+	if (S_ISREG(st.st_mode))
+		dtype = DT_REG;
+	else if (S_ISDIR(st.st_mode))
+		dtype = DT_DIR;
+	else if (S_ISLNK(st.st_mode))
+		dtype = DT_LNK;
+
+cleanup:
+	strbuf_setlen(path, base_path_len);
+	return dtype;
+}
+
+static int count_files(struct strbuf *path)
+{
+	DIR *dir = opendir(path->buf);
 	struct dirent *e;
 	int count = 0;
 
 	if (!dir)
 		return 0;
 
-	while ((e = readdir(dir)) != NULL)
-		if (!is_dot_or_dotdot(e->d_name) && e->d_type == DT_REG)
+	while ((e = readdir_skip_dot_and_dotdot(dir)) != NULL)
+		if (get_dtype(e, path) == DT_REG)
 			count++;
 
 	closedir(dir);
@@ -104,13 +140,13 @@ static void loose_objs_stats(struct strbuf *buf, const char *path)
 	strbuf_addch(&count_path, '/');
 	base_path_len = count_path.len;
 
-	while ((e = readdir(dir)) != NULL)
-		if (!is_dot_or_dotdot(e->d_name) &&
-		    e->d_type == DT_DIR && strlen(e->d_name) == 2 &&
+	while ((e = readdir_skip_dot_and_dotdot(dir)) != NULL)
+		if (get_dtype(e, &count_path) == DT_DIR &&
+		    strlen(e->d_name) == 2 &&
 		    !hex_to_bytes(&c, e->d_name, 1)) {
 			strbuf_setlen(&count_path, base_path_len);
-			strbuf_addstr(&count_path, e->d_name);
-			total += (count = count_files(count_path.buf));
+			strbuf_addf(&count_path, "%s/", e->d_name);
+			total += (count = count_files(&count_path));
 			strbuf_addf(buf, "%s : %7d files\n", e->d_name, count);
 		}
 
@@ -144,22 +180,28 @@ static int add_directory_to_archiver(struct strvec *archiver_args,
 	len = buf.len;
 	strvec_pushf(archiver_args, "--prefix=%s", buf.buf);
 
-	while (!res && (e = readdir(dir))) {
-		if (!strcmp(".", e->d_name) || !strcmp("..", e->d_name))
-			continue;
+	while (!res && (e = readdir_skip_dot_and_dotdot(dir))) {
+		struct strbuf abspath = STRBUF_INIT;
+		unsigned char dtype;
+
+		strbuf_add_absolute_path(&abspath, at_root ? "." : path);
+		strbuf_addch(&abspath, '/');
+		dtype = get_dtype(e, &abspath);
 
 		strbuf_setlen(&buf, len);
 		strbuf_addstr(&buf, e->d_name);
 
-		if (e->d_type == DT_REG)
+		if (dtype == DT_REG)
 			strvec_pushf(archiver_args, "--add-file=%s", buf.buf);
-		else if (e->d_type != DT_DIR)
+		else if (dtype != DT_DIR)
 			warning(_("skipping '%s', which is neither file nor "
 				  "directory"), buf.buf);
 		else if (recurse &&
 			 add_directory_to_archiver(archiver_args,
 						   buf.buf, recurse) < 0)
 			res = -1;
+
+		strbuf_release(&abspath);
 	}
 
 	closedir(dir);
diff --git a/t/t0092-diagnose.sh b/t/t0092-diagnose.sh
index fca9b58489c..133e5747d61 100755
--- a/t/t0092-diagnose.sh
+++ b/t/t0092-diagnose.sh
@@ -28,12 +28,23 @@ test_expect_success UNZIP 'creates diagnostics zip archive' '
 	! "$GIT_UNZIP" -l "$zip_path" | grep ".git/"
 '
 
+test_expect_success UNZIP 'counts loose objects' '
+	test_commit A &&
+
+	# After committing, should have non-zero loose objects
+	git diagnose -o test-count -s 1 >out &&
+	zip_path=test-count/git-diagnostics-1.zip &&
+	"$GIT_UNZIP" -p "$zip_path" objects-local.txt >out &&
+	grep "^Total: [1-9][0-9]* loose objects" out
+'
+
 test_expect_success UNZIP '--mode=stats excludes .git dir contents' '
 	test_when_finished rm -rf report &&
 
 	git diagnose -o report -s test --mode=stats >out &&
 
 	# Includes pack quantity/size info
+	zip_path=report/git-diagnostics-test.zip &&
 	"$GIT_UNZIP" -p "$zip_path" packs-local.txt >out &&
 	grep ".git/objects" out &&
 
@@ -47,6 +58,7 @@ test_expect_success UNZIP '--mode=all includes .git dir contents' '
 	git diagnose -o report -s test --mode=all >out &&
 
 	# Includes pack quantity/size info
+	zip_path=report/git-diagnostics-test.zip &&
 	"$GIT_UNZIP" -p "$zip_path" packs-local.txt >out &&
 	grep ".git/objects" out &&
 

base-commit: d3fa443f97e3a8d75b51341e2d5bac380b7422df
-- 
gitgitgadget

             reply	other threads:[~2022-09-17 18:17 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-17 18:16 Victoria Dye via GitGitGadget [this message]
2022-09-19  1:13 ` [PATCH] diagnose.c: refactor to safely use 'd_type' Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=pull.1354.git.1663438615413.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=rsbecker@nexbridge.com \
    --cc=vdye@github.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.