git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 3/3] git-daemon: rewrite kindergarden
  2008-08-13  8:43 [PATCH 1/3] git-daemon: logging done right Stephen R. van den Berg
@ 2008-08-13  8:43 ` Stephen R. van den Berg
  2008-08-13  8:58   ` Stephen R. van den Berg
                     ` (2 more replies)
  2008-08-13  8:43 ` [PATCH 2/3] git-daemon: make the signal handler almost a no-op Stephen R. van den Berg
  2008-08-13 23:13 ` [PATCH 1/3] git-daemon: logging done right Junio C Hamano
  2 siblings, 3 replies; 16+ messages in thread
From: Stephen R. van den Berg @ 2008-08-13  8:43 UTC (permalink / raw)
  To: git

Get rid of the silly fixed array of children and make
max-connections dynamic and configurable in the process.
Fix the killing code to actually be smart instead of the
pseudo-random mess.
Avoid forking if too busy already.

Signed-off-by: Stephen R. van den Berg <srb@cuci.nl>
---

 Documentation/git-daemon.txt |    9 +-
 daemon.c                     |  215 +++++++++++++++---------------------------
 2 files changed, 84 insertions(+), 140 deletions(-)

diff --git a/Documentation/git-daemon.txt b/Documentation/git-daemon.txt
index 4ba4b75..b08a08c 100644
--- a/Documentation/git-daemon.txt
+++ b/Documentation/git-daemon.txt
@@ -9,8 +9,9 @@ SYNOPSIS
 --------
 [verse]
 'git daemon' [--verbose] [--syslog] [--export-all]
-	     [--timeout=n] [--init-timeout=n] [--strict-paths]
-	     [--base-path=path] [--user-path | --user-path=path]
+	     [--timeout=n] [--init-timeout=n] [--max-connections=n]
+	     [--strict-paths] [--base-path=path] [--base-path-relaxed]
+	     [--user-path | --user-path=path]
 	     [--interpolated-path=pathtemplate]
 	     [--reuseaddr] [--detach] [--pid-file=file]
 	     [--enable=service] [--disable=service]
@@ -99,6 +100,10 @@ OPTIONS
 	it takes for the server to process the sub-request and time spent
 	waiting for next client's request.
 
+--max-connections::
+	Maximum number of concurrent clients, defaults to 32.  Set it to
+	zero for no limit.
+
 --syslog::
 	Log to syslog instead of stderr. Note that this option does not imply
 	--verbose, thus by default only error conditions will be logged.
diff --git a/daemon.c b/daemon.c
index 19d620c..a7a735c 100644
--- a/daemon.c
+++ b/daemon.c
@@ -19,8 +19,8 @@ static int reuseaddr;
 
 static const char daemon_usage[] =
 "git daemon [--verbose] [--syslog] [--export-all]\n"
-"           [--timeout=n] [--init-timeout=n] [--strict-paths]\n"
-"           [--base-path=path] [--base-path-relaxed]\n"
+"           [--timeout=n] [--init-timeout=n] [--max-connections=n]\n"
+"           [--strict-paths] [--base-path=path] [--base-path-relaxed]\n"
 "           [--user-path | --user-path=path]\n"
 "           [--interpolated-path=path]\n"
 "           [--reuseaddr] [--detach] [--pid-file=file]\n"
@@ -584,40 +584,37 @@ static int execute(struct sockaddr *addr)
 	return -1;
 }
 
+static int max_connections = 32;
 
-/*
- * We count spawned/reaped separately, just to avoid any
- * races when updating them from signals. The SIGCHLD handler
- * will only update children_reaped, and the fork logic will
- * only update children_spawned.
- *
- * MAX_CHILDREN should be a power-of-two to make the modulus
- * operation cheap. It should also be at least twice
- * the maximum number of connections we will ever allow.
- */
-#define MAX_CHILDREN 128
-
-static int max_connections = 25;
-
-/* These are updated by the signal handler */
-static volatile unsigned int children_reaped;
-static pid_t dead_child[MAX_CHILDREN];
-
-/* These are updated by the main loop */
-static unsigned int children_spawned;
-static unsigned int children_deleted;
+static unsigned int live_children;
 
 static struct child {
+	struct child*next;
 	pid_t pid;
-	int addrlen;
 	struct sockaddr_storage address;
-} live_child[MAX_CHILDREN];
+} *firstborn;
 
-static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen)
+static void add_child(pid_t pid, struct sockaddr *addr, int addrlen)
 {
-	live_child[idx].pid = pid;
-	live_child[idx].addrlen = addrlen;
-	memcpy(&live_child[idx].address, addr, addrlen);
+	struct child*newborn;
+	newborn = xcalloc(1, sizeof *newborn);
+	if (newborn) {
+		struct child **cradle, *blanket;
+
+		live_children++;
+		newborn->pid = pid;
+		memcpy(&newborn->address, addr, addrlen);
+		for (cradle = &firstborn;
+		     (blanket = *cradle);
+		     cradle = &blanket->next)
+			if (!memcmp(&blanket->address, &newborn->address,
+				   sizeof newborn->address))
+				break;
+		newborn->next = blanket;
+		*cradle = newborn;
+	}
+	else
+		logerror("Out of memory spawning new child");
 }
 
 /*
@@ -626,142 +623,78 @@ static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen)
  * We move everything up by one, since the new "deleted" will
  * be one higher.
  */
-static void remove_child(pid_t pid, unsigned deleted, unsigned spawned)
+static void remove_child(pid_t pid)
 {
-	struct child n;
+	struct child **cradle, *blanket;
 
-	deleted %= MAX_CHILDREN;
-	spawned %= MAX_CHILDREN;
-	if (live_child[deleted].pid == pid) {
-		live_child[deleted].pid = -1;
-		return;
-	}
-	n = live_child[deleted];
-	for (;;) {
-		struct child m;
-		deleted = (deleted + 1) % MAX_CHILDREN;
-		if (deleted == spawned)
-			die("could not find dead child %d\n", pid);
-		m = live_child[deleted];
-		live_child[deleted] = n;
-		if (m.pid == pid)
-			return;
-		n = m;
-	}
+	for (cradle = &firstborn; (blanket = *cradle); cradle = &blanket->next)
+		if (blanket->pid == pid) {
+			*cradle = blanket->next;
+			live_children--;
+			free(blanket);
+			break;
+		}
 }
 
 /*
  * This gets called if the number of connections grows
  * past "max_connections".
  *
- * We _should_ start off by searching for connections
- * from the same IP, and if there is some address wth
- * multiple connections, we should kill that first.
- *
- * As it is, we just "randomly" kill 25% of the connections,
- * and our pseudo-random generator sucks too. I have no
- * shame.
- *
- * Really, this is just a place-holder for a _real_ algorithm.
+ * We kill the newest connection from a duplicate IP.
  */
-static void kill_some_children(int signo, unsigned start, unsigned stop)
+static void kill_some_child()
 {
-	start %= MAX_CHILDREN;
-	stop %= MAX_CHILDREN;
-	while (start != stop) {
-		if (!(start & 3))
-			kill(live_child[start].pid, signo);
-		start = (start + 1) % MAX_CHILDREN;
-	}
-}
-
-static void check_dead_children(void)
-{
-	unsigned spawned, reaped, deleted;
-
-	for (;;) {
-	    int status;
-	    pid_t pid = waitpid(-1, &status, WNOHANG);
-	
-	    if (pid > 0) {
-	        unsigned reaped = children_reaped;
-	        if (!WIFEXITED(status) || WEXITSTATUS(status) > 0)
-	            pid = -pid;
-	        dead_child[reaped % MAX_CHILDREN] = pid;
-	        children_reaped = reaped + 1;
-	        continue;
-	    }
-	    break;
-	}
+	const struct child *blanket;
 
-	spawned = children_spawned;
-	reaped = children_reaped;
-	deleted = children_deleted;
+	if ((blanket = firstborn)) {
+		const struct child *next;
 
-	while (deleted < reaped) {
-		pid_t pid = dead_child[deleted % MAX_CHILDREN];
-		const char *dead = pid < 0 ? " (with error)" : "";
-
-		if (pid < 0)
-			pid = -pid;
-
-		/* XXX: Custom logging, since we don't wanna getpid() */
-		if (verbose) {
-			if (log_syslog)
-				syslog(LOG_INFO, "[%d] Disconnected%s",
-						pid, dead);
-			else
-				fprintf(stderr, "[%d] Disconnected%s\n",
-						pid, dead);
-		}
-		remove_child(pid, deleted, spawned);
-		deleted++;
+		for (; (next = blanket->next); blanket = next)
+			if (!memcmp(&blanket->address, &next->address,
+				   sizeof next->address)) {
+				kill(blanket->pid, SIGTERM);
+				break;
+			}
 	}
-	children_deleted = deleted;
 }
 
-static void check_max_connections(void)
+static void check_dead_children(void)
 {
-	for (;;) {
-		int active;
-		unsigned spawned, deleted;
-
-		check_dead_children();
-
-		spawned = children_spawned;
-		deleted = children_deleted;
-
-		active = spawned - deleted;
-		if (active <= max_connections)
-			break;
-
-		/* Kill some unstarted connections with SIGTERM */
-		kill_some_children(SIGTERM, deleted, spawned);
-		if (active <= max_connections << 1)
-			break;
+	int status;
+	pid_t pid;
 
-		/* If the SIGTERM thing isn't helping use SIGKILL */
-		kill_some_children(SIGKILL, deleted, spawned);
-		sleep(1);
+	while ((pid = waitpid(-1, &status, WNOHANG))>0) {
+		const char *dead = "";
+		remove_child(pid);
+		if (!WIFEXITED(status) || WEXITSTATUS(status) > 0)
+			dead = " (with error)";
+		loginfo("[%d] Disconnected%s", (int)pid, dead);
 	}
 }
 
 static void handle(int incoming, struct sockaddr *addr, int addrlen)
 {
-	pid_t pid = fork();
+	pid_t pid;
 
-	if (pid) {
-		unsigned idx;
+	if (max_connections && live_children >= max_connections) {
+		kill_some_child();
+		sleep(1);			 /* give it some time to die */
+		check_dead_children();
+		if (live_children >= max_connections) {
+			close(incoming);
+			logerror("Too many children, dropping connection");
+			return;
+		}
+	}
 
+	if ((pid = fork())) {
 		close(incoming);
-		if (pid < 0)
+		if (pid < 0) {
+			logerror("Couldn't fork %s", strerror(errno));
 			return;
+		}
 
-		idx = children_spawned % MAX_CHILDREN;
-		children_spawned++;
-		add_child(idx, pid, addr, addrlen);
-
-		check_max_connections();
+		add_child(pid, addr, addrlen);
 		return;
 	}
 
@@ -1081,6 +1014,12 @@ int main(int argc, char **argv)
 			init_timeout = atoi(arg+15);
 			continue;
 		}
+		if (!prefixcmp(arg, "--max-connections=")) {
+			max_connections = atoi(arg+18);
+			if (max_connections<=0)
+				max_connections=0;	        /* unlimited */
+			continue;
+		}
 		if (!strcmp(arg, "--strict-paths")) {
 			strict_paths = 1;
 			continue;

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 1/3] git-daemon: logging done right
@ 2008-08-13  8:43 Stephen R. van den Berg
  2008-08-13  8:43 ` [PATCH 3/3] git-daemon: rewrite kindergarden Stephen R. van den Berg
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Stephen R. van den Berg @ 2008-08-13  8:43 UTC (permalink / raw)
  To: git

Make git-daemon a proper syslogging citizen with PID-info.
Simplify the overzealous double buffering in the logroutine.
Call logerror() instead of error().

Signed-off-by: Stephen R. van den Berg <srb@cuci.nl>
---

 daemon.c |   49 +++++++++++++++++--------------------------------
 1 files changed, 17 insertions(+), 32 deletions(-)

diff --git a/daemon.c b/daemon.c
index 1c00305..774b2ce 100644
--- a/daemon.c
+++ b/daemon.c
@@ -78,38 +78,19 @@ static struct interp interp_table[] = {
 
 static void logreport(int priority, const char *err, va_list params)
 {
-	/* We should do a single write so that it is atomic and output
-	 * of several processes do not get intermingled. */
-	char buf[1024];
-	int buflen;
-	int maxlen, msglen;
-
-	/* sizeof(buf) should be big enough for "[pid] \n" */
-	buflen = snprintf(buf, sizeof(buf), "[%ld] ", (long) getpid());
-
-	maxlen = sizeof(buf) - buflen - 1; /* -1 for our own LF */
-	msglen = vsnprintf(buf + buflen, maxlen, err, params);
-
 	if (log_syslog) {
+		char buf[1024];
+		vsnprintf(buf, sizeof(buf), err, params);
 		syslog(priority, "%s", buf);
-		return;
 	}
-
-	/* maxlen counted our own LF but also counts space given to
-	 * vsnprintf for the terminating NUL.  We want to make sure that
-	 * we have space for our own LF and NUL after the "meat" of the
-	 * message, so truncate it at maxlen - 1.
-	 */
-	if (msglen > maxlen - 1)
-		msglen = maxlen - 1;
-	else if (msglen < 0)
-		msglen = 0; /* Protect against weird return values. */
-	buflen += msglen;
-
-	buf[buflen++] = '\n';
-	buf[buflen] = '\0';
-
-	write_in_full(2, buf, buflen);
+	else {
+		/* Since stderr is set to linebuffered mode, the
+		 * logging of different processes will not overlap
+		 */
+		fprintf(stderr, "[%d] ", (int)getpid());
+		vfprintf(stderr, err, params);
+		fputc('\n', stderr);
+	}
 }
 
 static void logerror(const char *err, ...)
@@ -836,7 +817,7 @@ static int socksetup(char *listen_addr, int listen_port, int **socklist_p)
 		if (sockfd < 0)
 			continue;
 		if (sockfd >= FD_SETSIZE) {
-			error("too large socket descriptor.");
+			logerror("too large socket descriptor.");
 			close(sockfd);
 			continue;
 		}
@@ -1178,9 +1159,11 @@ int main(int argc, char **argv)
 	}
 
 	if (log_syslog) {
-		openlog("git-daemon", 0, LOG_DAEMON);
+		openlog("git-daemon", LOG_PID, LOG_DAEMON);
 		set_die_routine(daemon_die);
 	}
+	else			    /* so that logging into a file is atomic */
+		setlinebuf(stderr);
 
 	if (inetd_mode && (group_name || user_name))
 		die("--user and --group are incompatible with --inetd");
@@ -1233,8 +1216,10 @@ int main(int argc, char **argv)
 		return execute(peer);
 	}
 
-	if (detach)
+	if (detach) {
 		daemonize();
+		loginfo("Ready to rumble");
+	}
 	else
 		sanitize_stdfds();
 

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/3] git-daemon: make the signal handler almost a no-op
  2008-08-13  8:43 [PATCH 1/3] git-daemon: logging done right Stephen R. van den Berg
  2008-08-13  8:43 ` [PATCH 3/3] git-daemon: rewrite kindergarden Stephen R. van den Berg
@ 2008-08-13  8:43 ` Stephen R. van den Berg
  2008-08-14  0:09   ` Junio C Hamano
  2008-08-14 13:41   ` Johannes Schindelin
  2008-08-13 23:13 ` [PATCH 1/3] git-daemon: logging done right Junio C Hamano
  2 siblings, 2 replies; 16+ messages in thread
From: Stephen R. van den Berg @ 2008-08-13  8:43 UTC (permalink / raw)
  To: git

by exploiting the fact that systemcalls get interrupted by signals;
and even they aren't, all zombies will be collected before the next
accept().
Fix another error() -> logerror() call.

Signed-off-by: Stephen R. van den Berg <srb@cuci.nl>
---

 daemon.c |   67 +++++++++++++++++++++++++++++---------------------------------
 1 files changed, 31 insertions(+), 36 deletions(-)

diff --git a/daemon.c b/daemon.c
index 774b2ce..19d620c 100644
--- a/daemon.c
+++ b/daemon.c
@@ -16,7 +16,6 @@
 static int log_syslog;
 static int verbose;
 static int reuseaddr;
-static int child_handler_pipe[2];
 
 static const char daemon_usage[] =
 "git daemon [--verbose] [--syslog] [--export-all]\n"
@@ -680,6 +679,21 @@ static void check_dead_children(void)
 {
 	unsigned spawned, reaped, deleted;
 
+	for (;;) {
+	    int status;
+	    pid_t pid = waitpid(-1, &status, WNOHANG);
+	
+	    if (pid > 0) {
+	        unsigned reaped = children_reaped;
+	        if (!WIFEXITED(status) || WEXITSTATUS(status) > 0)
+	            pid = -pid;
+	        dead_child[reaped % MAX_CHILDREN] = pid;
+	        children_reaped = reaped + 1;
+	        continue;
+	    }
+	    break;
+	}
+
 	spawned = children_spawned;
 	reaped = children_reaped;
 	deleted = children_deleted;
@@ -760,21 +774,10 @@ static void handle(int incoming, struct sockaddr *addr, int addrlen)
 
 static void child_handler(int signo)
 {
-	for (;;) {
-		int status;
-		pid_t pid = waitpid(-1, &status, WNOHANG);
-
-		if (pid > 0) {
-			unsigned reaped = children_reaped;
-			if (!WIFEXITED(status) || WEXITSTATUS(status) > 0)
-				pid = -pid;
-			dead_child[reaped % MAX_CHILDREN] = pid;
-			children_reaped = reaped + 1;
-			write(child_handler_pipe[1], &status, 1);
-			continue;
-		}
-		break;
-	}
+	/* Otherwise empty handler because systemcalls will get interrupted
+	 * upon signal receipt
+	 * SysV needs the handler to be reinstated
+	 */
 	signal(SIGCHLD, child_handler);
 }
 
@@ -917,35 +920,30 @@ static int service_loop(int socknum, int *socklist)
 	struct pollfd *pfd;
 	int i;
 
-	if (pipe(child_handler_pipe) < 0)
-		die ("Could not set up pipe for child handler");
-
-	pfd = xcalloc(socknum + 1, sizeof(struct pollfd));
+	pfd = xcalloc(socknum, sizeof(struct pollfd));
 
 	for (i = 0; i < socknum; i++) {
 		pfd[i].fd = socklist[i];
 		pfd[i].events = POLLIN;
 	}
-	pfd[socknum].fd = child_handler_pipe[0];
-	pfd[socknum].events = POLLIN;
-
-	child_handler(0);
 
 	for (;;) {
 		int i;
+		int olderrno;
 
-		if (poll(pfd, socknum + 1, -1) < 0) {
-			if (errno != EINTR) {
-				error("poll failed, resuming: %s",
-				      strerror(errno));
+		i = poll(pfd, socknum, -1);
+		olderrno = errno;
+
+		check_dead_children();
+
+		if (i < 0) {
+			if (olderrno != EINTR) {
+				logerror("poll failed, resuming: %s",
+				      strerror(olderrno));
 				sleep(1);
 			}
 			continue;
 		}
-		if (pfd[socknum].revents & POLLIN) {
-			read(child_handler_pipe[0], &i, 1);
-			check_dead_children();
-		}
 
 		for (i = 0; i < socknum; i++) {
 			if (pfd[i].revents & POLLIN) {
@@ -1036,10 +1034,7 @@ int main(int argc, char **argv)
 	gid_t gid = 0;
 	int i;
 
-	/* Without this we cannot rely on waitpid() to tell
-	 * what happened to our children.
-	 */
-	signal(SIGCHLD, SIG_DFL);
+	child_handler(0);
 
 	for (i = 1; i < argc; i++) {
 		char *arg = argv[i];

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] git-daemon: rewrite kindergarden
  2008-08-13  8:43 ` [PATCH 3/3] git-daemon: rewrite kindergarden Stephen R. van den Berg
@ 2008-08-13  8:58   ` Stephen R. van den Berg
  2008-08-13  9:00   ` [PATCH] " Stephen R. van den Berg
  2008-08-13  9:05   ` [PATCH 3/3] git-daemon: rewrite kindergarden Petr Baudis
  2 siblings, 0 replies; 16+ messages in thread
From: Stephen R. van den Berg @ 2008-08-13  8:58 UTC (permalink / raw)
  To: git

>+			if (max_connections<=0)
>+				max_connections=0;	        /* unlimited */

Hmmm, slight formatting error.  Sorry about that.  I'll resend this
last patch.
-- 
Sincerely,
           Stephen R. van den Berg.

"And now for something *completely* different!"

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH] git-daemon: rewrite kindergarden
  2008-08-13  8:43 ` [PATCH 3/3] git-daemon: rewrite kindergarden Stephen R. van den Berg
  2008-08-13  8:58   ` Stephen R. van den Berg
@ 2008-08-13  9:00   ` Stephen R. van den Berg
  2008-08-13 10:40     ` [PATCH] git-daemon: rewrite kindergarden, new option --max-connections Stephen R. van den Berg
  2008-08-13  9:05   ` [PATCH 3/3] git-daemon: rewrite kindergarden Petr Baudis
  2 siblings, 1 reply; 16+ messages in thread
From: Stephen R. van den Berg @ 2008-08-13  9:00 UTC (permalink / raw)
  To: git

Get rid of the silly fixed array of children and make
max-connections dynamic and configurable in the process.
Fix the killing code to actually be smart instead of the
pseudo-random mess.
Avoid forking if too busy already.

Signed-off-by: Stephen R. van den Berg <srb@cuci.nl>
---

 Documentation/git-daemon.txt |    9 +-
 daemon.c                     |  215 +++++++++++++++---------------------------
 2 files changed, 84 insertions(+), 140 deletions(-)

diff --git a/Documentation/git-daemon.txt b/Documentation/git-daemon.txt
index 4ba4b75..b08a08c 100644
--- a/Documentation/git-daemon.txt
+++ b/Documentation/git-daemon.txt
@@ -9,8 +9,9 @@ SYNOPSIS
 --------
 [verse]
 'git daemon' [--verbose] [--syslog] [--export-all]
-	     [--timeout=n] [--init-timeout=n] [--strict-paths]
-	     [--base-path=path] [--user-path | --user-path=path]
+	     [--timeout=n] [--init-timeout=n] [--max-connections=n]
+	     [--strict-paths] [--base-path=path] [--base-path-relaxed]
+	     [--user-path | --user-path=path]
 	     [--interpolated-path=pathtemplate]
 	     [--reuseaddr] [--detach] [--pid-file=file]
 	     [--enable=service] [--disable=service]
@@ -99,6 +100,10 @@ OPTIONS
 	it takes for the server to process the sub-request and time spent
 	waiting for next client's request.
 
+--max-connections::
+	Maximum number of concurrent clients, defaults to 32.  Set it to
+	zero for no limit.
+
 --syslog::
 	Log to syslog instead of stderr. Note that this option does not imply
 	--verbose, thus by default only error conditions will be logged.
diff --git a/daemon.c b/daemon.c
index 19d620c..a04ad1c 100644
--- a/daemon.c
+++ b/daemon.c
@@ -19,8 +19,8 @@ static int reuseaddr;
 
 static const char daemon_usage[] =
 "git daemon [--verbose] [--syslog] [--export-all]\n"
-"           [--timeout=n] [--init-timeout=n] [--strict-paths]\n"
-"           [--base-path=path] [--base-path-relaxed]\n"
+"           [--timeout=n] [--init-timeout=n] [--max-connections=n]\n"
+"           [--strict-paths] [--base-path=path] [--base-path-relaxed]\n"
 "           [--user-path | --user-path=path]\n"
 "           [--interpolated-path=path]\n"
 "           [--reuseaddr] [--detach] [--pid-file=file]\n"
@@ -584,40 +584,37 @@ static int execute(struct sockaddr *addr)
 	return -1;
 }
 
+static int max_connections = 32;
 
-/*
- * We count spawned/reaped separately, just to avoid any
- * races when updating them from signals. The SIGCHLD handler
- * will only update children_reaped, and the fork logic will
- * only update children_spawned.
- *
- * MAX_CHILDREN should be a power-of-two to make the modulus
- * operation cheap. It should also be at least twice
- * the maximum number of connections we will ever allow.
- */
-#define MAX_CHILDREN 128
-
-static int max_connections = 25;
-
-/* These are updated by the signal handler */
-static volatile unsigned int children_reaped;
-static pid_t dead_child[MAX_CHILDREN];
-
-/* These are updated by the main loop */
-static unsigned int children_spawned;
-static unsigned int children_deleted;
+static unsigned int live_children;
 
 static struct child {
+	struct child*next;
 	pid_t pid;
-	int addrlen;
 	struct sockaddr_storage address;
-} live_child[MAX_CHILDREN];
+} *firstborn;
 
-static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen)
+static void add_child(pid_t pid, struct sockaddr *addr, int addrlen)
 {
-	live_child[idx].pid = pid;
-	live_child[idx].addrlen = addrlen;
-	memcpy(&live_child[idx].address, addr, addrlen);
+	struct child*newborn;
+	newborn = xcalloc(1, sizeof *newborn);
+	if (newborn) {
+		struct child **cradle, *blanket;
+
+		live_children++;
+		newborn->pid = pid;
+		memcpy(&newborn->address, addr, addrlen);
+		for (cradle = &firstborn;
+		     (blanket = *cradle);
+		     cradle = &blanket->next)
+			if (!memcmp(&blanket->address, &newborn->address,
+				   sizeof newborn->address))
+				break;
+		newborn->next = blanket;
+		*cradle = newborn;
+	}
+	else
+		logerror("Out of memory spawning new child");
 }
 
 /*
@@ -626,142 +623,78 @@ static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen)
  * We move everything up by one, since the new "deleted" will
  * be one higher.
  */
-static void remove_child(pid_t pid, unsigned deleted, unsigned spawned)
+static void remove_child(pid_t pid)
 {
-	struct child n;
+	struct child **cradle, *blanket;
 
-	deleted %= MAX_CHILDREN;
-	spawned %= MAX_CHILDREN;
-	if (live_child[deleted].pid == pid) {
-		live_child[deleted].pid = -1;
-		return;
-	}
-	n = live_child[deleted];
-	for (;;) {
-		struct child m;
-		deleted = (deleted + 1) % MAX_CHILDREN;
-		if (deleted == spawned)
-			die("could not find dead child %d\n", pid);
-		m = live_child[deleted];
-		live_child[deleted] = n;
-		if (m.pid == pid)
-			return;
-		n = m;
-	}
+	for (cradle = &firstborn; (blanket = *cradle); cradle = &blanket->next)
+		if (blanket->pid == pid) {
+			*cradle = blanket->next;
+			live_children--;
+			free(blanket);
+			break;
+		}
 }
 
 /*
  * This gets called if the number of connections grows
  * past "max_connections".
  *
- * We _should_ start off by searching for connections
- * from the same IP, and if there is some address wth
- * multiple connections, we should kill that first.
- *
- * As it is, we just "randomly" kill 25% of the connections,
- * and our pseudo-random generator sucks too. I have no
- * shame.
- *
- * Really, this is just a place-holder for a _real_ algorithm.
+ * We kill the newest connection from a duplicate IP.
  */
-static void kill_some_children(int signo, unsigned start, unsigned stop)
+static void kill_some_child()
 {
-	start %= MAX_CHILDREN;
-	stop %= MAX_CHILDREN;
-	while (start != stop) {
-		if (!(start & 3))
-			kill(live_child[start].pid, signo);
-		start = (start + 1) % MAX_CHILDREN;
-	}
-}
-
-static void check_dead_children(void)
-{
-	unsigned spawned, reaped, deleted;
-
-	for (;;) {
-	    int status;
-	    pid_t pid = waitpid(-1, &status, WNOHANG);
-	
-	    if (pid > 0) {
-	        unsigned reaped = children_reaped;
-	        if (!WIFEXITED(status) || WEXITSTATUS(status) > 0)
-	            pid = -pid;
-	        dead_child[reaped % MAX_CHILDREN] = pid;
-	        children_reaped = reaped + 1;
-	        continue;
-	    }
-	    break;
-	}
+	const struct child *blanket;
 
-	spawned = children_spawned;
-	reaped = children_reaped;
-	deleted = children_deleted;
+	if ((blanket = firstborn)) {
+		const struct child *next;
 
-	while (deleted < reaped) {
-		pid_t pid = dead_child[deleted % MAX_CHILDREN];
-		const char *dead = pid < 0 ? " (with error)" : "";
-
-		if (pid < 0)
-			pid = -pid;
-
-		/* XXX: Custom logging, since we don't wanna getpid() */
-		if (verbose) {
-			if (log_syslog)
-				syslog(LOG_INFO, "[%d] Disconnected%s",
-						pid, dead);
-			else
-				fprintf(stderr, "[%d] Disconnected%s\n",
-						pid, dead);
-		}
-		remove_child(pid, deleted, spawned);
-		deleted++;
+		for (; (next = blanket->next); blanket = next)
+			if (!memcmp(&blanket->address, &next->address,
+				   sizeof next->address)) {
+				kill(blanket->pid, SIGTERM);
+				break;
+			}
 	}
-	children_deleted = deleted;
 }
 
-static void check_max_connections(void)
+static void check_dead_children(void)
 {
-	for (;;) {
-		int active;
-		unsigned spawned, deleted;
-
-		check_dead_children();
-
-		spawned = children_spawned;
-		deleted = children_deleted;
-
-		active = spawned - deleted;
-		if (active <= max_connections)
-			break;
-
-		/* Kill some unstarted connections with SIGTERM */
-		kill_some_children(SIGTERM, deleted, spawned);
-		if (active <= max_connections << 1)
-			break;
+	int status;
+	pid_t pid;
 
-		/* If the SIGTERM thing isn't helping use SIGKILL */
-		kill_some_children(SIGKILL, deleted, spawned);
-		sleep(1);
+	while ((pid = waitpid(-1, &status, WNOHANG))>0) {
+		const char *dead = "";
+		remove_child(pid);
+		if (!WIFEXITED(status) || WEXITSTATUS(status) > 0)
+			dead = " (with error)";
+		loginfo("[%d] Disconnected%s", (int)pid, dead);
 	}
 }
 
 static void handle(int incoming, struct sockaddr *addr, int addrlen)
 {
-	pid_t pid = fork();
+	pid_t pid;
 
-	if (pid) {
-		unsigned idx;
+	if (max_connections && live_children >= max_connections) {
+		kill_some_child();
+		sleep(1);			 /* give it some time to die */
+		check_dead_children();
+		if (live_children >= max_connections) {
+			close(incoming);
+			logerror("Too many children, dropping connection");
+			return;
+		}
+	}
 
+	if ((pid = fork())) {
 		close(incoming);
-		if (pid < 0)
+		if (pid < 0) {
+			logerror("Couldn't fork %s", strerror(errno));
 			return;
+		}
 
-		idx = children_spawned % MAX_CHILDREN;
-		children_spawned++;
-		add_child(idx, pid, addr, addrlen);
-
-		check_max_connections();
+		add_child(pid, addr, addrlen);
 		return;
 	}
 
@@ -1081,6 +1014,12 @@ int main(int argc, char **argv)
 			init_timeout = atoi(arg+15);
 			continue;
 		}
+		if (!prefixcmp(arg, "--max-connections=")) {
+			max_connections = atoi(arg+18);
+			if (max_connections < 0)
+				max_connections = 0;	        /* unlimited */
+			continue;
+		}
 		if (!strcmp(arg, "--strict-paths")) {
 			strict_paths = 1;
 			continue;

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] git-daemon: rewrite kindergarden
  2008-08-13  8:43 ` [PATCH 3/3] git-daemon: rewrite kindergarden Stephen R. van den Berg
  2008-08-13  8:58   ` Stephen R. van den Berg
  2008-08-13  9:00   ` [PATCH] " Stephen R. van den Berg
@ 2008-08-13  9:05   ` Petr Baudis
  2008-08-13 10:37     ` Stephen R. van den Berg
  2 siblings, 1 reply; 16+ messages in thread
From: Petr Baudis @ 2008-08-13  9:05 UTC (permalink / raw)
  To: Stephen R. van den Berg; +Cc: git

On Wed, Aug 13, 2008 at 10:43:31AM +0200, Stephen R. van den Berg wrote:
> Get rid of the silly fixed array of children and make
> max-connections dynamic and configurable in the process.
> Fix the killing code to actually be smart instead of the
> pseudo-random mess.
> Avoid forking if too busy already.
> 
> Signed-off-by: Stephen R. van den Berg <srb@cuci.nl>

I would somehow mention the string '--max-connections' in the log
message; that is really useful when looking when some option was
introduced.

> diff --git a/daemon.c b/daemon.c
> index 19d620c..a7a735c 100644
> --- a/daemon.c
> +++ b/daemon.c
> @@ -584,40 +584,37 @@ static int execute(struct sockaddr *addr)
>  	return -1;
>  }
>  
> +static int max_connections = 32;
>  
> -/*
> - * We count spawned/reaped separately, just to avoid any
> - * races when updating them from signals. The SIGCHLD handler
> - * will only update children_reaped, and the fork logic will
> - * only update children_spawned.
> - *
> - * MAX_CHILDREN should be a power-of-two to make the modulus
> - * operation cheap. It should also be at least twice
> - * the maximum number of connections we will ever allow.
> - */
> -#define MAX_CHILDREN 128
> -
> -static int max_connections = 25;
> -
> -/* These are updated by the signal handler */
> -static volatile unsigned int children_reaped;
> -static pid_t dead_child[MAX_CHILDREN];
> -
> -/* These are updated by the main loop */
> -static unsigned int children_spawned;
> -static unsigned int children_deleted;
> +static unsigned int live_children;
>  
>  static struct child {
> +	struct child*next;

struct child *next;

>  	pid_t pid;
> -	int addrlen;
>  	struct sockaddr_storage address;
> -} live_child[MAX_CHILDREN];
> +} *firstborn;
>  
> -static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen)
> +static void add_child(pid_t pid, struct sockaddr *addr, int addrlen)
>  {
> -	live_child[idx].pid = pid;
> -	live_child[idx].addrlen = addrlen;
> -	memcpy(&live_child[idx].address, addr, addrlen);
> +	struct child*newborn;

struct child *newborn;

> +	newborn = xcalloc(1, sizeof *newborn);
> +	if (newborn) {
> +		struct child **cradle, *blanket;
> +
> +		live_children++;
> +		newborn->pid = pid;
> +		memcpy(&newborn->address, addr, addrlen);
> +		for (cradle = &firstborn;
> +		     (blanket = *cradle);
> +		     cradle = &blanket->next)
> +			if (!memcmp(&blanket->address, &newborn->address,
> +				   sizeof newborn->address))
> +				break;
> +		newborn->next = blanket;
> +		*cradle = newborn;

So, I can always excuse myself by mentioning that it's early in the
morning (for me ;-) but what do you actually need the blanket for, in
this warm digital world?

The current for statement is *really* cryptic... What about:

+		struct child **cradle;
+
+		live_children++;
+		newborn->pid = pid;
+		memcpy(&newborn->address, addr, addrlen);
+		for (cradle = &firstborn; *cradle; cradle = &(*cradle)->next)
+			if (!memcmp(&(*cradle)->address, &newborn->address,
+				   sizeof newborn->address))
+				break;
+		newborn->next = *cradle;
+		*cradle = newborn;

> +	}
> +	else
> +		logerror("Out of memory spawning new child");
>  }
>  
>  /*
> @@ -626,142 +623,78 @@ static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen)
>   * We move everything up by one, since the new "deleted" will
>   * be one higher.
>   */
> -static void remove_child(pid_t pid, unsigned deleted, unsigned spawned)
> +static void remove_child(pid_t pid)
>  {
> -	struct child n;
> +	struct child **cradle, *blanket;
>  
> +	for (cradle = &firstborn; (blanket = *cradle); cradle = &blanket->next)
> +		if (blanket->pid == pid) {
> +			*cradle = blanket->next;
> +			live_children--;
> +			free(blanket);
> +			break;
> +		}
>  }

Same here. You just need a temporary variable in the innermost block.

> +static void kill_some_child()
>  {
> +	const struct child *blanket;
>  
> +	if ((blanket = firstborn)) {
> +		const struct child *next;
>  
> +		for (; (next = blanket->next); blanket = next)
> +			if (!memcmp(&blanket->address, &next->address,
> +				   sizeof next->address)) {
> +				kill(blanket->pid, SIGTERM);
> +				break;
> +			}

I think using cradle instead of blanket in this for loop would be more
consistent, if perhaps somewhat more morbid.

-- 
				Petr "Pasky" Baudis
The next generation of interesting software will be done
on the Macintosh, not the IBM PC.  -- Bill Gates

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] git-daemon: rewrite kindergarden
  2008-08-13  9:05   ` [PATCH 3/3] git-daemon: rewrite kindergarden Petr Baudis
@ 2008-08-13 10:37     ` Stephen R. van den Berg
  0 siblings, 0 replies; 16+ messages in thread
From: Stephen R. van den Berg @ 2008-08-13 10:37 UTC (permalink / raw)
  To: Petr Baudis; +Cc: git

Petr Baudis wrote:
>On Wed, Aug 13, 2008 at 10:43:31AM +0200, Stephen R. van den Berg wrote:
>I would somehow mention the string '--max-connections' in the log
>message; that is really useful when looking when some option was
>introduced.

Fixed.

>> +	struct child*next;

>struct child *next;

>> +	struct child*newborn;

>struct child *newborn;

Fixed.

>So, I can always excuse myself by mentioning that it's early in the
>morning (for me ;-) but what do you actually need the blanket for, in
>this warm digital world?

This seems to be a case of code restructuring and forgetting to
optimise.  There was a reason for the blanket before, but I am warming
up to the idea to go without blanket in this case.

>The current for statement is *really* cryptic... What about:

Probably ok, I'll refactor it accordingly.

>> -static void remove_child(pid_t pid, unsigned deleted, unsigned spawned)
>> +static void remove_child(pid_t pid)
>>  {
>> -	struct child n;
>> +	struct child **cradle, *blanket;

>> +	for (cradle = &firstborn; (blanket = *cradle); cradle = &blanket->next)
>> +		if (blanket->pid == pid) {
>> +			*cradle = blanket->next;
>> +			live_children--;
>> +			free(blanket);
>> +			break;
>> +		}

>Same here. You just need a temporary variable in the innermost block.

Yes, but here using the blanket eliminates some * operators.
So I'd prefer to keep it.

>> +static void kill_some_child()
>>  {
>> +	const struct child *blanket;

>> +	if ((blanket = firstborn)) {
>> +		const struct child *next;

>> +		for (; (next = blanket->next); blanket = next)
>> +			if (!memcmp(&blanket->address, &next->address,
>> +				   sizeof next->address)) {
>> +				kill(blanket->pid, SIGTERM);
>> +				break;
>> +			}

>I think using cradle instead of blanket in this for loop would be more
>consistent, if perhaps somewhat more morbid.

I kept the cradle for the double indirections (you have to actually
reach deeper into a cradle), a blanket can be touched without reaching
deep, so I'd prefer to keep a blanket here.
-- 
Sincerely,
           Stephen R. van den Berg.

"And now for something *completely* different!"

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH] git-daemon: rewrite kindergarden, new option --max-connections
  2008-08-13  9:00   ` [PATCH] " Stephen R. van den Berg
@ 2008-08-13 10:40     ` Stephen R. van den Berg
  0 siblings, 0 replies; 16+ messages in thread
From: Stephen R. van den Berg @ 2008-08-13 10:40 UTC (permalink / raw)
  To: git

Get rid of the silly fixed array of children and make
max-connections dynamic and configurable in the process.
Fix the killing code to actually be smart instead of the
pseudo-random mess.
Avoid forking if too busy already.

Signed-off-by: Stephen R. van den Berg <srb@cuci.nl>
---

 Documentation/git-daemon.txt |    9 +-
 daemon.c                     |  213 +++++++++++++++---------------------------
 2 files changed, 82 insertions(+), 140 deletions(-)

diff --git a/Documentation/git-daemon.txt b/Documentation/git-daemon.txt
index 4ba4b75..b08a08c 100644
--- a/Documentation/git-daemon.txt
+++ b/Documentation/git-daemon.txt
@@ -9,8 +9,9 @@ SYNOPSIS
 --------
 [verse]
 'git daemon' [--verbose] [--syslog] [--export-all]
-	     [--timeout=n] [--init-timeout=n] [--strict-paths]
-	     [--base-path=path] [--user-path | --user-path=path]
+	     [--timeout=n] [--init-timeout=n] [--max-connections=n]
+	     [--strict-paths] [--base-path=path] [--base-path-relaxed]
+	     [--user-path | --user-path=path]
 	     [--interpolated-path=pathtemplate]
 	     [--reuseaddr] [--detach] [--pid-file=file]
 	     [--enable=service] [--disable=service]
@@ -99,6 +100,10 @@ OPTIONS
 	it takes for the server to process the sub-request and time spent
 	waiting for next client's request.
 
+--max-connections::
+	Maximum number of concurrent clients, defaults to 32.  Set it to
+	zero for no limit.
+
 --syslog::
 	Log to syslog instead of stderr. Note that this option does not imply
 	--verbose, thus by default only error conditions will be logged.
diff --git a/daemon.c b/daemon.c
index 19d620c..05a4328 100644
--- a/daemon.c
+++ b/daemon.c
@@ -19,8 +19,8 @@ static int reuseaddr;
 
 static const char daemon_usage[] =
 "git daemon [--verbose] [--syslog] [--export-all]\n"
-"           [--timeout=n] [--init-timeout=n] [--strict-paths]\n"
-"           [--base-path=path] [--base-path-relaxed]\n"
+"           [--timeout=n] [--init-timeout=n] [--max-connections=n]\n"
+"           [--strict-paths] [--base-path=path] [--base-path-relaxed]\n"
 "           [--user-path | --user-path=path]\n"
 "           [--interpolated-path=path]\n"
 "           [--reuseaddr] [--detach] [--pid-file=file]\n"
@@ -584,40 +584,35 @@ static int execute(struct sockaddr *addr)
 	return -1;
 }
 
+static int max_connections = 32;
 
-/*
- * We count spawned/reaped separately, just to avoid any
- * races when updating them from signals. The SIGCHLD handler
- * will only update children_reaped, and the fork logic will
- * only update children_spawned.
- *
- * MAX_CHILDREN should be a power-of-two to make the modulus
- * operation cheap. It should also be at least twice
- * the maximum number of connections we will ever allow.
- */
-#define MAX_CHILDREN 128
-
-static int max_connections = 25;
-
-/* These are updated by the signal handler */
-static volatile unsigned int children_reaped;
-static pid_t dead_child[MAX_CHILDREN];
-
-/* These are updated by the main loop */
-static unsigned int children_spawned;
-static unsigned int children_deleted;
+static unsigned int live_children;
 
 static struct child {
+	struct child *next;
 	pid_t pid;
-	int addrlen;
 	struct sockaddr_storage address;
-} live_child[MAX_CHILDREN];
+} *firstborn;
 
-static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen)
+static void add_child(pid_t pid, struct sockaddr *addr, int addrlen)
 {
-	live_child[idx].pid = pid;
-	live_child[idx].addrlen = addrlen;
-	memcpy(&live_child[idx].address, addr, addrlen);
+	struct child *newborn;
+	newborn = xcalloc(1, sizeof *newborn);
+	if (newborn) {
+		struct child **cradle;
+
+		live_children++;
+		newborn->pid = pid;
+		memcpy(&newborn->address, addr, addrlen);
+		for (cradle = &firstborn; *cradle; cradle = &(*cradle)->next)
+			if (!memcmp(&(*cradle)->address, &newborn->address,
+				   sizeof newborn->address))
+				break;
+		newborn->next = *cradle;
+		*cradle = newborn;
+	}
+	else
+		logerror("Out of memory spawning new child");
 }
 
 /*
@@ -626,142 +621,78 @@ static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen)
  * We move everything up by one, since the new "deleted" will
  * be one higher.
  */
-static void remove_child(pid_t pid, unsigned deleted, unsigned spawned)
+static void remove_child(pid_t pid)
 {
-	struct child n;
+	struct child **cradle, *blanket;
 
-	deleted %= MAX_CHILDREN;
-	spawned %= MAX_CHILDREN;
-	if (live_child[deleted].pid == pid) {
-		live_child[deleted].pid = -1;
-		return;
-	}
-	n = live_child[deleted];
-	for (;;) {
-		struct child m;
-		deleted = (deleted + 1) % MAX_CHILDREN;
-		if (deleted == spawned)
-			die("could not find dead child %d\n", pid);
-		m = live_child[deleted];
-		live_child[deleted] = n;
-		if (m.pid == pid)
-			return;
-		n = m;
-	}
+	for (cradle = &firstborn; (blanket = *cradle); cradle = &blanket->next)
+		if (blanket->pid == pid) {
+			*cradle = blanket->next;
+			live_children--;
+			free(blanket);
+			break;
+		}
 }
 
 /*
  * This gets called if the number of connections grows
  * past "max_connections".
  *
- * We _should_ start off by searching for connections
- * from the same IP, and if there is some address wth
- * multiple connections, we should kill that first.
- *
- * As it is, we just "randomly" kill 25% of the connections,
- * and our pseudo-random generator sucks too. I have no
- * shame.
- *
- * Really, this is just a place-holder for a _real_ algorithm.
+ * We kill the newest connection from a duplicate IP.
  */
-static void kill_some_children(int signo, unsigned start, unsigned stop)
+static void kill_some_child()
 {
-	start %= MAX_CHILDREN;
-	stop %= MAX_CHILDREN;
-	while (start != stop) {
-		if (!(start & 3))
-			kill(live_child[start].pid, signo);
-		start = (start + 1) % MAX_CHILDREN;
-	}
-}
-
-static void check_dead_children(void)
-{
-	unsigned spawned, reaped, deleted;
-
-	for (;;) {
-	    int status;
-	    pid_t pid = waitpid(-1, &status, WNOHANG);
-	
-	    if (pid > 0) {
-	        unsigned reaped = children_reaped;
-	        if (!WIFEXITED(status) || WEXITSTATUS(status) > 0)
-	            pid = -pid;
-	        dead_child[reaped % MAX_CHILDREN] = pid;
-	        children_reaped = reaped + 1;
-	        continue;
-	    }
-	    break;
-	}
+	const struct child *blanket;
 
-	spawned = children_spawned;
-	reaped = children_reaped;
-	deleted = children_deleted;
+	if ((blanket = firstborn)) {
+		const struct child *next;
 
-	while (deleted < reaped) {
-		pid_t pid = dead_child[deleted % MAX_CHILDREN];
-		const char *dead = pid < 0 ? " (with error)" : "";
-
-		if (pid < 0)
-			pid = -pid;
-
-		/* XXX: Custom logging, since we don't wanna getpid() */
-		if (verbose) {
-			if (log_syslog)
-				syslog(LOG_INFO, "[%d] Disconnected%s",
-						pid, dead);
-			else
-				fprintf(stderr, "[%d] Disconnected%s\n",
-						pid, dead);
-		}
-		remove_child(pid, deleted, spawned);
-		deleted++;
+		for (; (next = blanket->next); blanket = next)
+			if (!memcmp(&blanket->address, &next->address,
+				   sizeof next->address)) {
+				kill(blanket->pid, SIGTERM);
+				break;
+			}
 	}
-	children_deleted = deleted;
 }
 
-static void check_max_connections(void)
+static void check_dead_children(void)
 {
-	for (;;) {
-		int active;
-		unsigned spawned, deleted;
-
-		check_dead_children();
-
-		spawned = children_spawned;
-		deleted = children_deleted;
-
-		active = spawned - deleted;
-		if (active <= max_connections)
-			break;
-
-		/* Kill some unstarted connections with SIGTERM */
-		kill_some_children(SIGTERM, deleted, spawned);
-		if (active <= max_connections << 1)
-			break;
+	int status;
+	pid_t pid;
 
-		/* If the SIGTERM thing isn't helping use SIGKILL */
-		kill_some_children(SIGKILL, deleted, spawned);
-		sleep(1);
+	while ((pid = waitpid(-1, &status, WNOHANG))>0) {
+		const char *dead = "";
+		remove_child(pid);
+		if (!WIFEXITED(status) || WEXITSTATUS(status) > 0)
+			dead = " (with error)";
+		loginfo("[%d] Disconnected%s", (int)pid, dead);
 	}
 }
 
 static void handle(int incoming, struct sockaddr *addr, int addrlen)
 {
-	pid_t pid = fork();
+	pid_t pid;
 
-	if (pid) {
-		unsigned idx;
+	if (max_connections && live_children >= max_connections) {
+		kill_some_child();
+		sleep(1);			 /* give it some time to die */
+		check_dead_children();
+		if (live_children >= max_connections) {
+			close(incoming);
+			logerror("Too many children, dropping connection");
+			return;
+		}
+	}
 
+	if ((pid = fork())) {
 		close(incoming);
-		if (pid < 0)
+		if (pid < 0) {
+			logerror("Couldn't fork %s", strerror(errno));
 			return;
+		}
 
-		idx = children_spawned % MAX_CHILDREN;
-		children_spawned++;
-		add_child(idx, pid, addr, addrlen);
-
-		check_max_connections();
+		add_child(pid, addr, addrlen);
 		return;
 	}
 
@@ -1081,6 +1012,12 @@ int main(int argc, char **argv)
 			init_timeout = atoi(arg+15);
 			continue;
 		}
+		if (!prefixcmp(arg, "--max-connections=")) {
+			max_connections = atoi(arg+18);
+			if (max_connections < 0)
+				max_connections = 0;	        /* unlimited */
+			continue;
+		}
 		if (!strcmp(arg, "--strict-paths")) {
 			strict_paths = 1;
 			continue;

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] git-daemon: logging done right
  2008-08-13  8:43 [PATCH 1/3] git-daemon: logging done right Stephen R. van den Berg
  2008-08-13  8:43 ` [PATCH 3/3] git-daemon: rewrite kindergarden Stephen R. van den Berg
  2008-08-13  8:43 ` [PATCH 2/3] git-daemon: make the signal handler almost a no-op Stephen R. van den Berg
@ 2008-08-13 23:13 ` Junio C Hamano
  2 siblings, 0 replies; 16+ messages in thread
From: Junio C Hamano @ 2008-08-13 23:13 UTC (permalink / raw)
  To: Stephen R. van den Berg; +Cc: git

"Stephen R. van den Berg" <srb@cuci.nl> writes:

> Make git-daemon a proper syslogging citizen with PID-info.

In general, please try to avoid using the wording like that in the commit
log message, _without_ defining what you think is the proper way and why
you think the current behaviour is improper.

Do you mean "if we use syslog(), we can ask it to add pid information to
the output and we do not have to prepend pid information ourselves"?

For the same reason, "logging done right" is somewhat a suboptimal title
for the patch.  If the title describes concisely the reason why the new
way was chosen by you over the old way, and the readers can judge for
themselves if they agree with your reasoning and if the new way is better.
For example, you could have said "git-daemon: let syslog() to add our pid
to the messages".

Yes, it is _not_ the only change you are making with this patch, and the
example message won't describe what you did fully.  It may be an
indication that you are doing too many things in one patch, unless other
changes are "well, they are not a big deal but while we are at it why not
fix them" kind of changes.

> Simplify the overzealous double buffering in the logroutine.

Is there any overzealous double buffering involved?  I thought it just
does s*printf() twice into the single buf[]?  Are you referring to the
trick of setting stderr to line-buffered output?  It does remove the need
for these two s*printf(), and is an improvement.

I am however not as sure as you seem to be that these two changes make the
difference between "done right" and "done wrong" --- at most I'd say that
these fall into "improve the way the log is done" category.

> Call logerror() instead of error().

Calls to die() are covered by setting die-routine to daemon_die(), but the
error() calls are lost to bitbucket.  This is a real fix to keep otherwise
lost error message to the log stream.

Don't you want to also send the "poll failed" error to the log stream as
well?

Thanks.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] git-daemon: make the signal handler almost a no-op
  2008-08-13  8:43 ` [PATCH 2/3] git-daemon: make the signal handler almost a no-op Stephen R. van den Berg
@ 2008-08-14  0:09   ` Junio C Hamano
  2008-08-14  0:18     ` Stephen R. van den Berg
  2008-08-14 13:41   ` Johannes Schindelin
  1 sibling, 1 reply; 16+ messages in thread
From: Junio C Hamano @ 2008-08-14  0:09 UTC (permalink / raw)
  To: Stephen R. van den Berg; +Cc: git, Johannes Schindelin

"Stephen R. van den Berg" <srb@cuci.nl> writes:

> by exploiting the fact that systemcalls get interrupted by signals;
> and even they aren't, all zombies will be collected before the next
> accept().

Dscho may want to say something about "even they aren't..." part, after he
comes back to the keyboard.

> Fix another error() -> logerror() call.

which should have been in 1/3, I suppose.

> @@ -1036,10 +1034,7 @@ int main(int argc, char **argv)
>  	gid_t gid = 0;
>  	int i;
>  
> -	/* Without this we cannot rely on waitpid() to tell
> -	 * what happened to our children.
> -	 */
> -	signal(SIGCHLD, SIG_DFL);
> +	child_handler(0);

Why?

child_handler() logically is a two step process:

 * We are just informed that somebody died; let's do something about the
   corpse;

 * On some systems we need to rearm signals once they fired, so let's do
   that if necessary.

With your change, the first part happens to be almost no-op, but I do not
think it justifies this hunk.

After all, we might even want to do something like:

	static void child_handler(int signo)
        {
        	if (USE_SYSV_SIGNAL_SEMANTICS)
                	signal(SIGCHLD, child_handler);
	}

and have the compiler optimize out the signal rearming with

	cc CFLAGS=-DUSE_SYSV_SIGNAL_SEMANTICS=0

on suitable platforms in the future.  But you still want the initial
signal set-up to happen unconditionally.

At this point, we aren't informed by the system that somebody died, and we
would want to arm the signal regardless of the platform's signal semantics.

The rest of the patch looked sane, although I did not read it very
carefully.

Thanks.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] git-daemon: make the signal handler almost a no-op
  2008-08-14  0:09   ` Junio C Hamano
@ 2008-08-14  0:18     ` Stephen R. van den Berg
  2008-08-14  1:09       ` Junio C Hamano
  0 siblings, 1 reply; 16+ messages in thread
From: Stephen R. van den Berg @ 2008-08-14  0:18 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Johannes Schindelin

Junio C Hamano wrote:
>"Stephen R. van den Berg" <srb@cuci.nl> writes:
>> by exploiting the fact that systemcalls get interrupted by signals;
>> and even they aren't, all zombies will be collected before the next
>> accept().

>Dscho may want to say something about "even they aren't..." part, after he
>comes back to the keyboard.

That should have read "even if they aren't".  Nonetheless, I don't know
systems where it doesn't work this way, but even if a system resisted,
the problem is mitigated by the fact that we reap the children before
every accept.

>> Fix another error() -> logerror() call.

>which should have been in 1/3, I suppose.

Sort of, yes, it was a bit messy to get it out in one piece.

>> @@ -1036,10 +1034,7 @@ int main(int argc, char **argv)
>>  	gid_t gid = 0;
>>  	int i;

>> -	/* Without this we cannot rely on waitpid() to tell
>> -	 * what happened to our children.
>> -	 */
>> -	signal(SIGCHLD, SIG_DFL);
>> +	child_handler(0);

>Why?

child_handler() now does barely more than setup the signal handler,
which is exactly what we want to do here.

>With your change, the first part happens to be almost no-op, but I do not
>think it justifies this hunk.

>After all, we might even want to do something like:

>	static void child_handler(int signo)
>        {
>        	if (USE_SYSV_SIGNAL_SEMANTICS)
>                	signal(SIGCHLD, child_handler);

>and have the compiler optimize out the signal rearming with

>	cc CFLAGS=-DUSE_SYSV_SIGNAL_SEMANTICS=0

In return I ask: why?
There is no particular performance reason to optimise this.
So in order to keep the code simpler, it might make an extra unneeded
systemcall on some systems when the signal is processed.  I don't think
it's worth our while to optimise this further.
-- 
Sincerely,
           Stephen R. van den Berg.

"And now for something *completely* different!"

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] git-daemon: make the signal handler almost a no-op
  2008-08-14  0:18     ` Stephen R. van den Berg
@ 2008-08-14  1:09       ` Junio C Hamano
  2008-08-14  7:47         ` Stephen R. van den Berg
  0 siblings, 1 reply; 16+ messages in thread
From: Junio C Hamano @ 2008-08-14  1:09 UTC (permalink / raw)
  To: Stephen R. van den Berg; +Cc: git, Johannes Schindelin

"Stephen R. van den Berg" <srb@cuci.nl> writes:

>>> @@ -1036,10 +1034,7 @@ int main(int argc, char **argv)
>>>  	gid_t gid = 0;
>>>  	int i;
>
>>> -	/* Without this we cannot rely on waitpid() to tell
>>> -	 * what happened to our children.
>>> -	 */
>>> -	signal(SIGCHLD, SIG_DFL);
>>> +	child_handler(0);
>
>>Why?
>
> child_handler() now does barely more than setup the signal handler,
> which is exactly what we want to do here.
>
>>With your change, the first part happens to be almost no-op, but I do not
>>think it justifies this hunk.
>
>>After all, we might even want to do something like:
>
>>	static void child_handler(int signo)
>>        {
>>        	if (USE_SYSV_SIGNAL_SEMANTICS)
>>                	signal(SIGCHLD, child_handler);
>
>>and have the compiler optimize out the signal rearming with
>
>>	cc CFLAGS=-DUSE_SYSV_SIGNAL_SEMANTICS=0
>
> In return I ask: why?

Please read the part you omitted from your quote again.

I agree it would be very meaningless change as an optimization, but I am
concerned more about robustness and what makes sense.

Do you agree that "child_handler()" is a signal handler to handle SIGCHLD,
and such a signal handler conceptually consists of two parts? i.e.

	static void child_handler()
        {
        	reap_dead_children();
                rearm_signal_as_needed();
	}

Your argument is it is Ok to call this function when you are arming the
signal for the first time, because reap_dead_children() happens to be
empty, and your rearm_signal_as_needed() happens to be the same as
arm_signal_always().

Yes, it happens to be _Ok_ now.  But is it an improvement in the longer
term?  I do not think so.

I do not see why you think it is better to rely on these two assumptions
than being explicit and say "we set up the signal for the first time
always on any platform", especially when the latter is much more direct
way to say what your intention is.  Or are you gaining something by not
explicitly calling signal() for the first time?  I may be missing some
benefit for doing so.

It is a trade-off between that some benefit I am not seeing, and downside
that your version can be broken more easily by future changes to
child_handler(), because you are assuming more about what it happens to do
currently.

That's the kind of thing maintainers worry more about.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] git-daemon: make the signal handler almost a no-op
  2008-08-14  1:09       ` Junio C Hamano
@ 2008-08-14  7:47         ` Stephen R. van den Berg
  2008-08-14  8:26           ` Junio C Hamano
  0 siblings, 1 reply; 16+ messages in thread
From: Stephen R. van den Berg @ 2008-08-14  7:47 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Johannes Schindelin

Junio C Hamano wrote:
>"Stephen R. van den Berg" <srb@cuci.nl> writes:
>>>> -	signal(SIGCHLD, SIG_DFL);
>>>> +	child_handler(0);

>>>After all, we might even want to do something like:

>>>	static void child_handler(int signo)
>>>        {
>>>        	if (USE_SYSV_SIGNAL_SEMANTICS)
>>>                	signal(SIGCHLD, child_handler);

>> In return I ask: why?

>I agree it would be very meaningless change as an optimization, but I am
>concerned more about robustness and what makes sense.

Well, even if robustness and "the principle of least surprise" are
the prime concerns, my change could still be considered worthwhile, but
it depends on your viewpoint.

>Do you agree that "child_handler()" is a signal handler to handle SIGCHLD,
>and such a signal handler conceptually consists of two parts? i.e.

Yes.

>	static void child_handler()
>        {
>        	reap_dead_children();
>                rearm_signal_as_needed();

>Your argument is it is Ok to call this function when you are arming the
>signal for the first time, because reap_dead_children() happens to be
>empty, and your rearm_signal_as_needed() happens to be the same as
>arm_signal_always().

Well, not quite, that is part of the argument, the other parts are
implicit.

>Yes, it happens to be _Ok_ now.  But is it an improvement in the longer
>term?  I do not think so.

>I do not see why you think it is better to rely on these two assumptions
>than being explicit and say "we set up the signal for the first time
>always on any platform", especially when the latter is much more direct
>way to say what your intention is.

Renaming the function could do it.

>  Or are you gaining something by not
>explicitly calling signal() for the first time?  I may be missing some
>benefit for doing so.

Well, strictly speaking the benefits you're overlooking is:

 It centralises the spot where the systemcall is made to arm the
 handler.  This means that if the setup needed to arm the handler
 ever becomes more complicated in future OSes, it only needs to be
 updated in one place.  This is a direct maintainability benefit.

>It is a trade-off between that some benefit I am not seeing, and downside
>that your version can be broken more easily by future changes to
>child_handler(), because you are assuming more about what it happens to do
>currently.

>That's the kind of thing maintainers worry more about.

Well, I see two solutions which increase maintainability:

Solution A:
==================================
void child_handler(int signo) {
  signal(SIGCHLD, child_handler);	/* rearm always for portability */
}

main() {
  ...
  signal(SIGCHLD, child_handler);
  ...
}
==================================

Solution B:
==================================
void setup_child_handler(int signo) {
  signal(SIGCHLD, setup_child_handler);	/* rearm always for portability */
}

main() {
  ...
  setup_child_handler(0);
  ...
}
==================================

Solution C:
==================================
void setup_child_handler(void);

void child_handler(int signo) {
  setup_child_handler();		/* rearm always for portability */
}

void setup_child_handler(void) {
  signal(SIGCHLD, child_handler);
}

main() {
  ...
  setup_child_handler();
  ...
}
==================================

Solution A is what you propose, but which I find less appealing because
any future magic to actually setup the handler needs to be maintained
and updated in two places.

Solution C is what follows your train of thought better, because it
future-proofs the setup as well as the handler.

Solution B is what I consider most elegant and maintainable, because at
this point in time I cannot imagine what extra handling would be
required inside the handler which would require a setup as complicated
as solution C; so in order to keep it as simple as possible and
eliminate forward declarations and minimise systemcalls I suggest we
pick solution B until the need for solution C ever arises (I don't
think it ever will).

But, in any case, you're the maintainer here, not I, so it's your call.
I vote for B, but just tell me which solution you prefer and I'll adapt
the code?
-- 
Sincerely,
           Stephen R. van den Berg.

"Hold still, while I inject you with SQL."

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] git-daemon: make the signal handler almost a no-op
  2008-08-14  7:47         ` Stephen R. van den Berg
@ 2008-08-14  8:26           ` Junio C Hamano
  2008-08-14 10:13             ` Stephen R. van den Berg
  0 siblings, 1 reply; 16+ messages in thread
From: Junio C Hamano @ 2008-08-14  8:26 UTC (permalink / raw)
  To: Stephen R. van den Berg; +Cc: git, Johannes Schindelin

"Stephen R. van den Berg" <srb@cuci.nl> writes:

> Solution A is what you propose, but which I find less appealing because
> any future magic to actually setup the handler needs to be maintained
> and updated in two places.

Well, A is attractive because it leaves the window open for us to later
not rearming it unconditionally from child_handler().  I happen to think
that the interface we will use to call "signal()" is much less likely to
change than what we would do in child_handler().

> Solution C is what follows your train of thought better, because it
> future-proofs the setup as well as the handler.

Surely, we could take this route as the logical conclusion from my
maintainability concerns, except that, if we are to make things as fine
grained as you suggest, we would definitely will not have a single
"setup", but "setup" and "rearm" as separate functions.  Perhaps "setup"
may start using sigaction() with SA_RESETHAND cleared, and we can make
"rearm" truly a no-op everywhere.

 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] git-daemon: make the signal handler almost a no-op
  2008-08-14  8:26           ` Junio C Hamano
@ 2008-08-14 10:13             ` Stephen R. van den Berg
  0 siblings, 0 replies; 16+ messages in thread
From: Stephen R. van den Berg @ 2008-08-14 10:13 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Johannes Schindelin

Junio C Hamano wrote:
>"Stephen R. van den Berg" <srb@cuci.nl> writes:
>> Solution A is what you propose, but which I find less appealing because
>> any future magic to actually setup the handler needs to be maintained
>> and updated in two places.

>Well, A is attractive because it leaves the window open for us to later
>not rearming it unconditionally from child_handler().  I happen to think
>that the interface we will use to call "signal()" is much less likely to
>change than what we would do in child_handler().

Then that is the fundamental difference in maintainability-views.
I find it more likely that we change the interface to setup the signal
handler (in a future OS or by changing the way you invoke it), than 
that the actual work done by the signal handler is changed.

But since your view carries more weight here, I'll adapt the patch to
solution A.

>> Solution C is what follows your train of thought better, because it
>> future-proofs the setup as well as the handler.

>"setup", but "setup" and "rearm" as separate functions.  Perhaps "setup"
>may start using sigaction() with SA_RESETHAND cleared, and we can make
>"rearm" truly a no-op everywhere.

>From a portability standpoint using sigaction is worse than signal, and
rearming the handler even when it is not required is not a big deal.
-- 
Sincerely,
           Stephen R. van den Berg.

"Hold still, while I inject you with SQL."

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] git-daemon: make the signal handler almost a no-op
  2008-08-13  8:43 ` [PATCH 2/3] git-daemon: make the signal handler almost a no-op Stephen R. van den Berg
  2008-08-14  0:09   ` Junio C Hamano
@ 2008-08-14 13:41   ` Johannes Schindelin
  1 sibling, 0 replies; 16+ messages in thread
From: Johannes Schindelin @ 2008-08-14 13:41 UTC (permalink / raw)
  To: Stephen R. van den Berg; +Cc: git

Hi,

On Wed, 13 Aug 2008, Stephen R. van den Berg wrote:

> by exploiting the fact that systemcalls get interrupted by signals; and 
> even they aren't, all zombies will be collected before the next 
> accept(). Fix another error() -> logerror() call.

I find this commit message more than cryptic.  So much so that I would 
have to study the patch in detail to _understand_ what it does.

Not having the time, I refuse,
Dscho

P.S.: Oh, and I might have been a little appalled by the language of your 
commit messages.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2008-08-14 13:37 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-08-13  8:43 [PATCH 1/3] git-daemon: logging done right Stephen R. van den Berg
2008-08-13  8:43 ` [PATCH 3/3] git-daemon: rewrite kindergarden Stephen R. van den Berg
2008-08-13  8:58   ` Stephen R. van den Berg
2008-08-13  9:00   ` [PATCH] " Stephen R. van den Berg
2008-08-13 10:40     ` [PATCH] git-daemon: rewrite kindergarden, new option --max-connections Stephen R. van den Berg
2008-08-13  9:05   ` [PATCH 3/3] git-daemon: rewrite kindergarden Petr Baudis
2008-08-13 10:37     ` Stephen R. van den Berg
2008-08-13  8:43 ` [PATCH 2/3] git-daemon: make the signal handler almost a no-op Stephen R. van den Berg
2008-08-14  0:09   ` Junio C Hamano
2008-08-14  0:18     ` Stephen R. van den Berg
2008-08-14  1:09       ` Junio C Hamano
2008-08-14  7:47         ` Stephen R. van den Berg
2008-08-14  8:26           ` Junio C Hamano
2008-08-14 10:13             ` Stephen R. van den Berg
2008-08-14 13:41   ` Johannes Schindelin
2008-08-13 23:13 ` [PATCH 1/3] git-daemon: logging done right Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).