All of lore.kernel.org
 help / color / mirror / Atom feed
From: "René Scharfe" <l.s.r@web.de>
To: Jonathan Nieder <jrnieder@gmail.com>,
	David Turner <dturner@twosigma.com>
Cc: git@vger.kernel.org, Jeff King <peff@peff.net>,
	Duy Nguyen <pclouds@gmail.com>
Subject: Re: [PATCH v3 1/2] use HOST_NAME_MAX to size buffers for gethostname(2)
Date: Wed, 19 Apr 2017 19:28:29 +0200	[thread overview]
Message-ID: <c0333c81-d3b2-ca2d-a553-75642d8fb949@web.de> (raw)
In-Reply-To: <20170419012824.GA28740@aiede.svl.corp.google.com>

Am 19.04.2017 um 03:28 schrieb Jonathan Nieder:
> David Turner wrote:
>> @@ -274,7 +278,7 @@ static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
>>   			 * running.
>>   			 */
>>   			time(NULL) - st.st_mtime <= 12 * 3600 &&
>> -			fscanf(fp, "%"SCNuMAX" %127c", &pid, locking_host) == 2 &&
>> +			fscanf(fp, scan_fmt, &pid, locking_host) == 2 &&
> 
> I hoped this could be simplified since HOST_NAME_MAX is a numeric literal,
> using the double-expansion trick:
> 
> #define STR_(s) # s
> #define STR(s) STR_(s)
> 
> 			fscanf(fp, "%" SCNuMAX " %" STR(HOST_NAME_MAX) "c",
> 			       &pid, locking_host);
> 
> Unfortunately, I don't think there's anything stopping a platform from
> defining
> 
> 	#define HOST_NAME_MAX 0x100
> 
> which would break that.
> 
> So this run-time calculation appears to be necessary.

I had another look at this last night and cooked up the following
patch.  Might have gone overboard with it..

-- >8 --
Subject: [PATCH] gc: support arbitrary hostnames and pids in lock_repo_for_gc()

git gc writes its pid and hostname into a pidfile to prevent concurrent
garbage collection.  Repositories may be shared between systems with
different limits for host name length and different pid ranges.  Use a
strbuf to store the file contents to allow for arbitrarily long
hostnames and pids to be shown to the user on early abort.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 builtin/gc.c | 151 +++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 111 insertions(+), 40 deletions(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index 2daede7820..4c1c01e87d 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -228,21 +228,99 @@ static int need_to_gc(void)
 	return 1;
 }
 
+struct pidfile {
+	struct strbuf buf;
+	char *hostname;
+};
+
+#define PIDFILE_INIT { STRBUF_INIT }
+
+static void pidfile_release(struct pidfile *pf)
+{
+	pf->hostname = NULL;
+	strbuf_release(&pf->buf);
+}
+
+static int pidfile_read(struct pidfile *pf, const char *path,
+			unsigned int max_age_seconds)
+{
+	int fd;
+	struct stat st;
+	ssize_t len;
+	char *space;
+	int rc = -1;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return rc;
+
+	if (fstat(fd, &st))
+		goto out;
+	if (time(NULL) - st.st_mtime > max_age_seconds)
+		goto out;
+	if (st.st_size > (size_t)st.st_size)
+		goto out;
+
+	len = strbuf_read(&pf->buf, fd, st.st_size);
+	if (len < 0)
+		goto out;
+
+	space = strchr(pf->buf.buf, ' ');
+	if (!space) {
+		pidfile_release(pf);
+		goto out;
+	}
+	pf->hostname = space + 1;
+	*space = '\0';
+
+	rc = 0;
+out:
+	close(fd);
+	return rc;
+}
+
+static int parse_pid(const char *value, pid_t *ret)
+{
+	if (value && *value) {
+		char *end;
+		intmax_t val;
+
+		errno = 0;
+		val = strtoimax(value, &end, 0);
+		if (errno == ERANGE)
+			return 0;
+		if (*end)
+			return 0;
+		if (labs(val) > maximum_signed_value_of_type(pid_t)) {
+			errno = ERANGE;
+			return 0;
+		}
+		*ret = val;
+		return 1;
+	}
+	errno = EINVAL;
+	return 0;
+}
+
+static int pidfile_process_exists(const struct pidfile *pf)
+{
+	pid_t pid;
+	return parse_pid(pf->buf.buf, &pid) &&
+		(!kill(pid, 0) || errno == EPERM);
+}
+
 /* return NULL on success, else hostname running the gc */
-static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
+static int lock_repo_for_gc(int force, struct pidfile *pf)
 {
 	static struct lock_file lock;
 	char my_host[128];
 	struct strbuf sb = STRBUF_INIT;
-	struct stat st;
-	uintmax_t pid;
-	FILE *fp;
 	int fd;
 	char *pidfile_path;
 
 	if (is_tempfile_active(&pidfile))
 		/* already locked */
-		return NULL;
+		return 0;
 
 	if (gethostname(my_host, sizeof(my_host)))
 		xsnprintf(my_host, sizeof(my_host), "unknown");
@@ -251,34 +329,27 @@ static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
 	fd = hold_lock_file_for_update(&lock, pidfile_path,
 				       LOCK_DIE_ON_ERROR);
 	if (!force) {
-		static char locking_host[128];
-		int should_exit;
-		fp = fopen(pidfile_path, "r");
-		memset(locking_host, 0, sizeof(locking_host));
-		should_exit =
-			fp != NULL &&
-			!fstat(fileno(fp), &st) &&
-			/*
-			 * 12 hour limit is very generous as gc should
-			 * never take that long. On the other hand we
-			 * don't really need a strict limit here,
-			 * running gc --auto one day late is not a big
-			 * problem. --force can be used in manual gc
-			 * after the user verifies that no gc is
-			 * running.
-			 */
-			time(NULL) - st.st_mtime <= 12 * 3600 &&
-			fscanf(fp, "%"SCNuMAX" %127c", &pid, locking_host) == 2 &&
+		/*
+		 * 12 hour limit is very generous as gc should
+		 * never take that long. On the other hand we
+		 * don't really need a strict limit here,
+		 * running gc --auto one day late is not a big
+		 * problem. --force can be used in manual gc
+		 * after the user verifies that no gc is
+		 * running.
+		 */
+		const unsigned max_age_seconds = 12 * 3600;
+
+		if (!pidfile_read(pf, pidfile_path, max_age_seconds)) {
 			/* be gentle to concurrent "gc" on remote hosts */
-			(strcmp(locking_host, my_host) || !kill(pid, 0) || errno == EPERM);
-		if (fp != NULL)
-			fclose(fp);
-		if (should_exit) {
-			if (fd >= 0)
-				rollback_lock_file(&lock);
-			*ret_pid = pid;
-			free(pidfile_path);
-			return locking_host;
+			if (strcmp(pf->hostname, my_host) ||
+			    pidfile_process_exists(pf)) {
+				if (fd >= 0)
+					rollback_lock_file(&lock);
+				free(pidfile_path);
+				return -1;
+			}
+			pidfile_release(pf);
 		}
 	}
 
@@ -289,7 +360,7 @@ static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
 	commit_lock_file(&lock);
 	register_tempfile(&pidfile, pidfile_path);
 	free(pidfile_path);
-	return NULL;
+	return 0;
 }
 
 static int report_last_gc_error(void)
@@ -344,8 +415,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
 	int auto_gc = 0;
 	int quiet = 0;
 	int force = 0;
-	const char *name;
-	pid_t pid;
+	struct pidfile pf = PIDFILE_INIT;
 	int daemonized = 0;
 
 	struct option builtin_gc_options[] = {
@@ -420,12 +490,13 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
 	} else
 		add_repack_all_option();
 
-	name = lock_repo_for_gc(force, &pid);
-	if (name) {
-		if (auto_gc)
+	if (lock_repo_for_gc(force, &pf)) {
+		if (auto_gc) {
+			pidfile_release(&pf);
 			return 0; /* be quiet on --auto */
-		die(_("gc is already running on machine '%s' pid %"PRIuMAX" (use --force if not)"),
-		    name, (uintmax_t)pid);
+		}
+		die(_("gc is already running on machine '%s' pid %s (use --force if not)"),
+		    pf.hostname, pf.buf.buf);
 	}
 
 	if (daemonized) {
-- 
2.12.2


  parent reply	other threads:[~2017-04-19 17:28 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-18 21:57 [PATCH v3 0/2] gethostbyname fixes David Turner
2017-04-18 21:57 ` [PATCH v3 1/2] use HOST_NAME_MAX to size buffers for gethostname(2) David Turner
2017-04-19  1:28   ` Jonathan Nieder
2017-04-19  2:57     ` Junio C Hamano
2017-04-19 14:03     ` René Scharfe
2017-04-19 17:28     ` René Scharfe [this message]
2017-04-19 19:08       ` David Turner
2017-04-19 19:09       ` Torsten Bögershausen
2017-04-19 20:02         ` René Scharfe
2017-04-20 18:37           ` Torsten Bögershausen
2017-04-20 19:28             ` René Scharfe
2017-04-21  4:18               ` Torsten Bögershausen
2017-04-18 21:57 ` [PATCH v3 2/2] xgethostname: handle long hostnames David Turner
2017-04-19  1:35   ` Jonathan Nieder
2017-04-19  2:51     ` Junio C Hamano
2017-04-19 15:50       ` David Turner
2017-04-19 16:43         ` René Scharfe
2017-04-19  2:48   ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c0333c81-d3b2-ca2d-a553-75642d8fb949@web.de \
    --to=l.s.r@web.de \
    --cc=dturner@twosigma.com \
    --cc=git@vger.kernel.org \
    --cc=jrnieder@gmail.com \
    --cc=pclouds@gmail.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.