linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* unregistered changes to the user<->kernel API
@ 2001-06-14 17:12 Andrea Arcangeli
  2001-06-14 17:16 ` Andrea Arcangeli
  2001-06-14 17:25 ` Jeff Garzik
  0 siblings, 2 replies; 15+ messages in thread
From: Andrea Arcangeli @ 2001-06-14 17:12 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar; +Cc: linux-kernel, Richard Henderson

There are a number of changes in kernel API visisble to userspace that
are unregistered in 2.4 mainline. I recommend to merge them ASAP to
avoid generating collisions across different versions of the kernel.

I'll attach here a number of patches that should make us to return in
sync. They must be applied incrementally. (really the very last one is
mostly here for comments, not intendeted for merging in mainline)

here the first that defines O_DIRECT (NOTE: the O_DIRECT value for alpha
is not definitive yet, O_DIRECTIO of tru64 is our O_NOFOLLOW so we're
just screwed as we just need a wrapper anyways to make complex programs like
dbms to run correctly without having to natively port them to linux,
02000000 in tru64 is O_DSYNC, maybe I should move it to 010000000
instead which maybe unused in tru64, but still we would have no
guarantee that it won't be used in the future, I was waiting Richard's
comment about it).

The sparc64 values are approved by Dave.

diff -urN 2.4.6pre3/include/asm-alpha/fcntl.h o_direct/include/asm-alpha/fcntl.h
--- 2.4.6pre3/include/asm-alpha/fcntl.h	Thu Nov 16 15:37:42 2000
+++ o_direct/include/asm-alpha/fcntl.h	Thu Jun 14 17:34:56 2001
@@ -17,10 +17,10 @@
 #define O_NDELAY	O_NONBLOCK
 #define O_SYNC		040000
 #define FASYNC		020000	/* fcntl, for BSD compatibility */
-#define O_DIRECT	040000	/* direct disk access - should check with OSF/1 */
 #define O_DIRECTORY	0100000	/* must be a directory */
 #define O_NOFOLLOW	0200000 /* don't follow links */
 #define O_LARGEFILE	0400000 /* will be set by the kernel on every open */
+#define O_DIRECT	02000000 /* direct disk access - should check with OSF/1 */
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff -urN 2.4.6pre3/include/asm-i386/fcntl.h o_direct/include/asm-i386/fcntl.h
--- 2.4.6pre3/include/asm-i386/fcntl.h	Thu Nov 16 15:37:33 2000
+++ o_direct/include/asm-i386/fcntl.h	Thu Jun 14 17:33:41 2001
@@ -16,7 +16,7 @@
 #define O_NDELAY	O_NONBLOCK
 #define O_SYNC		 010000
 #define FASYNC		 020000	/* fcntl, for BSD compatibility */
-#define O_DIRECT	 040000	/* direct disk access hint - currently ignored */
+#define O_DIRECT	 040000	/* direct disk access hint */
 #define O_LARGEFILE	0100000
 #define O_DIRECTORY	0200000	/* must be a directory */
 #define O_NOFOLLOW	0400000 /* don't follow links */
diff -urN 2.4.6pre3/include/asm-sparc/fcntl.h o_direct/include/asm-sparc/fcntl.h
--- 2.4.6pre3/include/asm-sparc/fcntl.h	Thu Nov 16 15:37:42 2000
+++ o_direct/include/asm-sparc/fcntl.h	Thu Jun 14 17:33:41 2001
@@ -20,6 +20,7 @@
 #define O_DIRECTORY	0x10000	/* must be a directory */
 #define O_NOFOLLOW	0x20000	/* don't follow links */
 #define O_LARGEFILE	0x40000
+#define O_DIRECT        0x100000 /* direct disk access hint */
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff -urN 2.4.6pre3/include/asm-sparc64/fcntl.h o_direct/include/asm-sparc64/fcntl.h
--- 2.4.6pre3/include/asm-sparc64/fcntl.h	Thu Nov 16 15:37:42 2000
+++ o_direct/include/asm-sparc64/fcntl.h	Thu Jun 14 17:33:41 2001
@@ -20,6 +20,8 @@
 #define O_DIRECTORY	0x10000	/* must be a directory */
 #define O_NOFOLLOW	0x20000	/* don't follow links */
 #define O_LARGEFILE	0x40000
+#define O_DIRECT        0x100000 /* direct disk access hint */
+
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */


Here the second patch that defines the PF_ATOMICALLOC (strictly speaking
this is not visible from userspace but it also cleanups a bit the
definitions).

--- atomicalloc/include/linux/sched.h.~1~	Thu Apr 26 02:04:44 2001
+++ atomicalloc/include/linux/sched.h	Thu Apr 26 04:05:28 2001
@@ -403,18 +403,15 @@
 /*
  * Per process flags
  */
-#define PF_ALIGNWARN	0x00000001	/* Print alignment warning msgs */
-					/* Not implemented yet, only for 486*/
-#define PF_STARTING	0x00000002	/* being created */
-#define PF_EXITING	0x00000004	/* getting shut down */
-#define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
-#define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
-#define PF_DUMPCORE	0x00000200	/* dumped core */
-#define PF_SIGNALED	0x00000400	/* killed by a signal */
-#define PF_MEMALLOC	0x00000800	/* Allocating memory */
-#define PF_VFORK	0x00001000	/* Wake up parent in mm_release */
-
-#define PF_USEDFPU	0x00100000	/* task used FPU this quantum (SMP) */
+#define PF_EXITING	(1UL<<0)	/* getting shut down */
+#define PF_FORKNOEXEC	(1UL<<1)	/* forked but didn't exec */
+#define PF_SUPERPRIV	(1UL<<2)	/* used super-user privileges */
+#define PF_DUMPCORE	(1UL<<3)	/* dumped core */
+#define PF_SIGNALED	(1UL<<4)	/* killed by a signal */
+#define PF_MEMALLOC	(1UL<<5)	/* Allocating memory */
+#define PF_VFORK	(1UL<<6)	/* Wake up parent in mm_release */
+#define PF_USEDFPU	(1UL<<7)	/* task used FPU this quantum (SMP) */
+#define PF_ATOMICALLOC	(1UL<<8)	/* do not block during memalloc */
 
 /*
  * Ptrace flags


Here the third, it registers the tux syscall at for the alpha so other
people won't use such same syscall for something else (I didn't remove
the #ifdefs since they don't hurt as they're undefined in mainline).

diff -urN ref/arch/alpha/kernel/entry.S tuxsys/arch/alpha/kernel/entry.S
--- ref/arch/alpha/kernel/entry.S	Sat Apr 28 18:37:45 2001
+++ tuxsys/arch/alpha/kernel/entry.S	Sun Apr 29 17:52:44 2001
@@ -1004,7 +1004,15 @@
 	.quad alpha_ni_syscall
 	.quad alpha_ni_syscall			/* 220 */
 	.quad alpha_ni_syscall
+#ifdef CONFIG_TUX
+	.quad __sys_tux
+#else
+# ifdef CONFIG_TUX_MODULE
+	.quad sys_tux
+# else
 	.quad alpha_ni_syscall
+# endif
+#endif
 	.quad alpha_ni_syscall
 	.quad alpha_ni_syscall
 	.quad alpha_ni_syscall			/* 225 */
diff -urN ref/arch/i386/kernel/entry.S tuxsys/arch/i386/kernel/entry.S
--- ref/arch/i386/kernel/entry.S	Sun Apr 29 17:00:20 2001
+++ tuxsys/arch/i386/kernel/entry.S	Sun Apr 29 17:53:36 2001
@@ -645,7 +645,15 @@
 	.long SYMBOL_NAME(sys_madvise)
 	.long SYMBOL_NAME(sys_getdents64)	/* 220 */
 	.long SYMBOL_NAME(sys_fcntl64)
+#ifdef CONFIG_TUX
+	.long SYMBOL_NAME(__sys_tux)
+#else
+# ifdef CONFIG_TUX_MODULE
+	.long SYMBOL_NAME(sys_tux)
+# else
 	.long SYMBOL_NAME(sys_ni_syscall)	/* reserved for TUX */
+# endif
+#endif
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long SYMBOL_NAME(sys_ni_syscall)


Here the forth, this defines the O_ATOMICLOOKUP and EWOULDBLOCKIO for the
non blocking dcache and pagecache lookups (the LOOKUP_ATOMIC isn't
visible from userspace but I defined it since I was there, if you want
you can drop the include/linux/fs.h part of the patch):

diff -urN ref/include/asm-alpha/fcntl.h atomiclookup/include/asm-alpha/fcntl.h
--- ref/include/asm-alpha/fcntl.h	Thu Jun 14 17:46:45 2001
+++ atomiclookup/include/asm-alpha/fcntl.h	Thu Jun 14 17:47:18 2001
@@ -20,6 +20,7 @@
 #define O_DIRECTORY	0100000	/* must be a directory */
 #define O_NOFOLLOW	0200000 /* don't follow links */
 #define O_LARGEFILE	0400000 /* will be set by the kernel on every open */
+#define O_ATOMICLOOKUP  01000000 /* do atomic file lookup */
 #define O_DIRECT	02000000 /* direct disk access - should check with OSF/1 */
 
 #define F_DUPFD		0	/* dup */
diff -urN ref/include/asm-i386/fcntl.h atomiclookup/include/asm-i386/fcntl.h
--- ref/include/asm-i386/fcntl.h	Thu Jun 14 17:46:45 2001
+++ atomiclookup/include/asm-i386/fcntl.h	Thu Jun 14 17:47:01 2001
@@ -20,6 +20,7 @@
 #define O_LARGEFILE	0100000
 #define O_DIRECTORY	0200000	/* must be a directory */
 #define O_NOFOLLOW	0400000 /* don't follow links */
+#define O_ATOMICLOOKUP	01000000 /* do atomic file lookup */
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff -urN ref/include/asm-ia64/fcntl.h atomiclookup/include/asm-ia64/fcntl.h
--- ref/include/asm-ia64/fcntl.h	Thu Nov 16 15:37:42 2000
+++ atomiclookup/include/asm-ia64/fcntl.h	Thu Jun 14 17:47:01 2001
@@ -28,6 +28,7 @@
 #define O_LARGEFILE	0100000
 #define O_DIRECTORY	0200000	/* must be a directory */
 #define O_NOFOLLOW	0400000 /* don't follow links */
+#define O_ATOMICLOOKUP  01000000 /* do atomic file lookup */
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
diff -urN ref/include/asm-sparc/fcntl.h atomiclookup/include/asm-sparc/fcntl.h
--- ref/include/asm-sparc/fcntl.h	Thu Jun 14 17:46:45 2001
+++ atomiclookup/include/asm-sparc/fcntl.h	Thu Jun 14 17:47:01 2001
@@ -20,6 +20,7 @@
 #define O_DIRECTORY	0x10000	/* must be a directory */
 #define O_NOFOLLOW	0x20000	/* don't follow links */
 #define O_LARGEFILE	0x40000
+#define O_ATOMICLOOKUP  0x80000 /* do atomic file lookup */
 #define O_DIRECT        0x100000 /* direct disk access hint */
 
 #define F_DUPFD		0	/* dup */
diff -urN ref/include/asm-sparc64/fcntl.h atomiclookup/include/asm-sparc64/fcntl.h
--- ref/include/asm-sparc64/fcntl.h	Thu Jun 14 17:46:45 2001
+++ atomiclookup/include/asm-sparc64/fcntl.h	Thu Jun 14 17:47:01 2001
@@ -20,6 +20,7 @@
 #define O_DIRECTORY	0x10000	/* must be a directory */
 #define O_NOFOLLOW	0x20000	/* don't follow links */
 #define O_LARGEFILE	0x40000
+#define O_ATOMICLOOKUP  0x80000 /* do atomic file lookup */
 #define O_DIRECT        0x100000 /* direct disk access hint */
 
 
diff -urN ref/include/linux/errno.h atomiclookup/include/linux/errno.h
--- ref/include/linux/errno.h	Fri Feb 23 21:20:14 2001
+++ atomiclookup/include/linux/errno.h	Thu Jun 14 17:47:01 2001
@@ -21,6 +21,9 @@
 #define EBADTYPE	527	/* Type not supported by server */
 #define EJUKEBOX	528	/* Request initiated, but will not complete before timeout */
 
+/* Defined for TUX async IO */
+#define EWOULDBLOCKIO	530	/* Would block due to block-IO */
+
 #endif
 
 #endif
diff -urN ref/include/linux/fs.h atomiclookup/include/linux/fs.h
--- ref/include/linux/fs.h	Thu Jun 14 17:46:45 2001
+++ atomiclookup/include/linux/fs.h	Thu Jun 14 17:47:01 2001
@@ -1227,6 +1227,7 @@
 #define LOOKUP_POSITIVE		(8)
 #define LOOKUP_PARENT		(16)
 #define LOOKUP_NOALT		(32)
+#define LOOKUP_ATOMIC		(64)
 /*
  * Type of the last component on LOOKUP_PARENT
  */

Here the fifth, this defines the tux sysctl numbers (OTOH the sysctl by
number gets broken all the time and nobody should use sysctl by number
with new sysctls anyways).

diff -urN 2.4.5pre5/include/linux/sysctl.h tux-sysctl/include/linux/sysctl.h
--- 2.4.5pre5/include/linux/sysctl.h	Tue May 22 22:04:27 2001
+++ tux-sysctl/include/linux/sysctl.h	Wed May 23 19:20:48 2001
@@ -157,7 +157,8 @@
 	NET_TR=14,
 	NET_DECNET=15,
 	NET_ECONET=16,
-	NET_KHTTPD=17
+	NET_KHTTPD=17,
+	NET_TUX=18
 };
 
 /* /proc/sys/kernel/random */
@@ -471,6 +472,55 @@
 	NET_DECNET_DST_GC_INTERVAL = 9,
 	NET_DECNET_CONF = 10,
 	NET_DECNET_DEBUG_LEVEL = 255
+};
+
+/* /proc/sys/net/tux/ */
+enum {
+	NET_TUX_DOCROOT			=  1,
+	NET_TUX_LOGFILE			=  2,
+	NET_TUX_EXTCGI			=  3,
+	NET_TUX_STOP			=  4,
+	NET_TUX_CLIENTPORT		=  5,
+	NET_TUX_LOGGING			=  6,
+	NET_TUX_SERVERPORT		=  7,
+	NET_TUX_THREADS			=  8,
+	NET_TUX_KEEPALIVE_TIMEOUT	=  9,
+	NET_TUX_MAX_KEEPALIVE_BW	= 10,
+	NET_TUX_DEFER_ACCEPT		= 11,
+	NET_TUX_MAX_FREE_REQUESTS	= 12,
+	NET_TUX_MAX_CONNECT		= 13,
+	NET_TUX_MAX_BACKLOG		= 14,
+	NET_TUX_MODE_FORBIDDEN		= 15,
+	NET_TUX_MODE_ALLOWED		= 16,
+	NET_TUX_MODE_USERSPACE		= 17,
+	NET_TUX_MODE_CGI		= 18,
+	NET_TUX_CGI_UID			= 19,
+	NET_TUX_CGI_GID			= 20,
+	NET_TUX_CGIROOT			= 21,
+	NET_TUX_LOGENTRY_ALIGN_ORDER	= 22,
+	NET_TUX_NONAGLE			= 23,
+	NET_TUX_ACK_PINGPONG		= 24,
+	NET_TUX_PUSH_ALL		= 25,
+	NET_TUX_ZEROCOPY_PARSE		= 26,
+	NET_CONFIG_TUX_DEBUG_BLOCKING	= 27,
+	NET_TUX_PAGE_AGE_START		= 28,
+	NET_TUX_PAGE_AGE_ADV		= 29,
+	NET_TUX_PAGE_AGE_MAX		= 30,
+	NET_TUX_VIRTUAL_SERVER		= 31,
+	NET_TUX_MAX_OBJECT_SIZE		= 32,
+	NET_TUX_COMPRESSION		= 33,
+	NET_TUX_NOID			= 34,
+	NET_TUX_CGI_INHERIT_CPU		= 35,
+	NET_TUX_CGI_CPU_MASK		= 36,
+	NET_TUX_ZEROCOPY_HEADER		= 37,
+	NET_TUX_ZEROCOPY_SENDFILE	= 38,
+	NET_TUX_ALL_USERSPACE		= 39,
+	NET_TUX_REDIRECT_LOGGING	= 40,
+	NET_TUX_REFERER_LOGGING		= 41,
+	NET_TUX_MAX_HEADER_LEN		= 42,
+	NET_TUX_404_PAGE		= 43,
+	NET_TUX_APPLICATION_PROTOCOL	= 44,
+	NET_TUX_MAX_KEEPALIVES		= 45,
 };
 
 /* /proc/sys/net/khttpd/ */


This last one gets visible in /proc/stat and I definitely hate it, it
should be really put somewhere else, it doesn't belong to /proc/stat, so
I'd vote to change tux to put it in a directory specific to tux that is
just present of course (but for now I'll keep it in my tree to avoid
generating userspace incompatibilities).

diff -urN 2.4.5pre5/fs/proc/proc_misc.c tux-kstat/fs/proc/proc_misc.c
--- 2.4.5pre5/fs/proc/proc_misc.c	Tue May  1 19:35:29 2001
+++ tux-kstat/fs/proc/proc_misc.c	Wed May 23 19:07:26 2001
@@ -259,6 +259,66 @@
 }
 #endif
 
+
+/*
+ * print out TUX internal statistics into /proc/stat.
+ * (Most of them are not maintained if CONFIG_TUX_DEBUG is off.)
+ */
+
+static int print_tux_procinfo (char *page)
+{
+	unsigned int len = 0, i;
+
+#define P(x) \
+	do { len += sprintf(page + len, #x ": %u\n", x); } while(0)
+
+	P(kstat.input_fastpath);
+	P(kstat.input_slowpath);
+	P(kstat.inputqueue_got_packet);
+	P(kstat.inputqueue_no_packet);
+	P(kstat.nr_keepalive_optimized);
+	P(kstat.parse_static_incomplete);
+	P(kstat.parse_static_redirect);
+	P(kstat.parse_static_cachemiss);
+	P(kstat.parse_static_nooutput);
+	P(kstat.parse_static_normal);
+	P(kstat.parse_dynamic_incomplete);
+	P(kstat.parse_dynamic_redirect);
+	P(kstat.parse_dynamic_cachemiss);
+	P(kstat.parse_dynamic_nooutput);
+	P(kstat.parse_dynamic_normal);
+	P(kstat.complete_parsing);
+	P(kstat.nr_free_pending);
+	P(kstat.nr_allocated);
+	P(kstat.nr_idle_input_pending);
+	P(kstat.nr_output_space_pending);
+	P(kstat.nr_input_pending);
+	P(kstat.nr_cachemiss_pending);
+	P(kstat.nr_secondary_pending);
+	P(kstat.nr_output_pending);
+	P(kstat.nr_redirect_pending);
+	P(kstat.nr_finish_pending);
+	P(kstat.nr_userspace_pending);
+	P(kstat.nr_postpone_pending);
+	P(kstat.static_lookup_cachemisses);
+	P(kstat.static_sendfile_cachemisses);
+	P(kstat.user_lookup_cachemisses);
+	P(kstat.user_fetch_cachemisses);
+	P(kstat.user_sendobject_cachemisses);
+	P(kstat.user_sendobject_write_misses);
+	P(kstat.nr_keepalive_reqs);
+	P(kstat.nr_nonkeepalive_reqs);
+
+	len += sprintf(page + len, "keephist: ");
+	for (i = 0; i < KEEPALIVE_HIST_SIZE; i++)
+		if (kstat.keepalive_hist[i])
+			len += sprintf(page + len, "%d(%d) ",
+					i, kstat.keepalive_hist[i]);
+	len += sprintf(page + len, "\n");
+#undef P
+
+	return len;
+}
 static int kstat_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
@@ -333,6 +393,8 @@
 		kstat.context_swtch,
 		xtime.tv_sec - jif / HZ,
 		total_forks);
+
+	len += print_tux_procinfo(page+len);
 
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
diff -urN 2.4.5pre5/include/linux/kernel_stat.h tux-kstat/include/linux/kernel_stat.h
--- 2.4.5pre5/include/linux/kernel_stat.h	Tue May 15 21:40:17 2001
+++ tux-kstat/include/linux/kernel_stat.h	Wed May 23 19:06:38 2001
@@ -33,6 +33,53 @@
 	unsigned int ierrors, oerrors;
 	unsigned int collisions;
 	unsigned int context_swtch;
+	unsigned int context_swtch_cross;
+	unsigned int nr_free_pending;
+	unsigned int nr_allocated;
+	unsigned int nr_idle_input_pending;
+	unsigned int nr_output_space_pending;
+	unsigned int nr_work_pending;
+	unsigned int nr_input_pending;
+	unsigned int nr_cachemiss_pending;
+	unsigned int nr_secondary_pending;
+	unsigned int nr_output_pending;
+	unsigned int nr_redirect_pending;
+	unsigned int nr_postpone_pending;
+	unsigned int nr_finish_pending;
+	unsigned int nr_userspace_pending;
+	unsigned int static_lookup_cachemisses;
+	unsigned int static_sendfile_cachemisses;
+	unsigned int user_lookup_cachemisses;
+	unsigned int user_fetch_cachemisses;
+	unsigned int user_sendobject_cachemisses;
+	unsigned int user_sendobject_write_misses;
+	unsigned int user_sendbuf_cachemisses;
+	unsigned int user_sendbuf_write_misses;
+#define URL_HIST_SIZE 1000
+	unsigned int url_hist_hits[URL_HIST_SIZE];
+	unsigned int url_hist_misses[URL_HIST_SIZE];
+	unsigned int input_fastpath;
+	unsigned int input_slowpath;
+	unsigned int inputqueue_got_packet;
+	unsigned int inputqueue_no_packet;
+	unsigned int nr_keepalive_optimized;
+
+	unsigned int parse_static_incomplete;
+	unsigned int parse_static_redirect;
+	unsigned int parse_static_cachemiss;
+	unsigned int parse_static_nooutput;
+	unsigned int parse_static_normal;
+	unsigned int parse_dynamic_incomplete;
+	unsigned int parse_dynamic_redirect;
+	unsigned int parse_dynamic_cachemiss;
+	unsigned int parse_dynamic_nooutput;
+	unsigned int parse_dynamic_normal;
+	unsigned int complete_parsing;
+
+	unsigned int nr_keepalive_reqs;
+	unsigned int nr_nonkeepalive_reqs;
+#define KEEPALIVE_HIST_SIZE 100
+	unsigned int keepalive_hist[KEEPALIVE_HIST_SIZE];
 };
 
 extern struct kernel_stat kstat;


Andrea

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: unregistered changes to the user<->kernel API
  2001-06-14 17:12 unregistered changes to the user<->kernel API Andrea Arcangeli
@ 2001-06-14 17:16 ` Andrea Arcangeli
  2001-06-14 17:21   ` Andrea Arcangeli
  2001-06-14 17:25 ` Jeff Garzik
  1 sibling, 1 reply; 15+ messages in thread
From: Andrea Arcangeli @ 2001-06-14 17:16 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar; +Cc: linux-kernel, Richard Henderson

On Thu, Jun 14, 2001 at 07:12:19PM +0200, Andrea Arcangeli wrote:
> is not definitive yet, O_DIRECTIO of tru64 is our O_NOFOLLOW so we're
> just screwed as we just need a wrapper anyways to make complex programs like

I just got the email from Richard that he prefers to break O_NOFOLLOW
than to define O_DIRECT to something else than 0200000.  So probably
there will be an incrmeental patch for the alpha later to apply on top
of the previous ones.

Also please folks remind to never choose random numbers for the alpha
userspace visible kernel API.

Andrea

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: unregistered changes to the user<->kernel API
  2001-06-14 17:16 ` Andrea Arcangeli
@ 2001-06-14 17:21   ` Andrea Arcangeli
  2001-06-14 17:32     ` Richard Henderson
  0 siblings, 1 reply; 15+ messages in thread
From: Andrea Arcangeli @ 2001-06-14 17:21 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar; +Cc: linux-kernel, Richard Henderson

On Thu, Jun 14, 2001 at 07:16:34PM +0200, Andrea Arcangeli wrote:
> I just got the email from Richard that he prefers to break O_NOFOLLOW

Richard are you sure we can break O_NOFOLLOW and still expect the machine to
boot?

./elf/cache.c:  fd = open (temp_name, O_CREAT|O_WRONLY|O_TRUNC|O_NOFOLLOW,
./elf/dl-profile.c:#ifdef O_NOFOLLOW
./elf/dl-profile.c:# define EXTRA_FLAGS | O_NOFOLLOW
./elf/rtld.c:#ifdef O_NOFOLLOW
./elf/rtld.c:      const int flags = O_WRONLY | O_APPEND | O_CREAT | O_NOFOLLOW;
./include/asm/fcntl.h:#define O_NOFOLLOW        0400000 /* don't follow links */
./sysdeps/generic/check_fds.c:     the O_NOFOLLOW flag for open() but only on some system.  */
./sysdeps/generic/check_fds.c:#ifndef O_NOFOLLOW
./sysdeps/generic/check_fds.c:# define O_NOFOLLOW       0
./sysdeps/generic/check_fds.c:  check_one_fd (STDIN_FILENO, O_RDONLY | O_NOFOLLOW);
./sysdeps/generic/check_fds.c:  check_one_fd (STDOUT_FILENO, O_RDWR | O_NOFOLLOW);
./sysdeps/generic/check_fds.c:  check_one_fd (STDERR_FILENO, O_RDWR | O_NOFOLLOW);
./sysdeps/unix/sysv/linux/alpha/bits/fcntl.h:# define O_NOFOLLOW        0200000 /* Do not follow links.  */
./sysdeps/unix/sysv/linux/shm_open.c:  fd = open (fname, oflag | O_NOFOLLOW, mode);

Andrea

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: unregistered changes to the user<->kernel API
  2001-06-14 17:12 unregistered changes to the user<->kernel API Andrea Arcangeli
  2001-06-14 17:16 ` Andrea Arcangeli
@ 2001-06-14 17:25 ` Jeff Garzik
  2001-06-14 17:44   ` Andrea Arcangeli
  1 sibling, 1 reply; 15+ messages in thread
From: Jeff Garzik @ 2001-06-14 17:25 UTC (permalink / raw)
  To: Andrea Arcangeli
  Cc: Linus Torvalds, Ingo Molnar, linux-kernel, Richard Henderson

Andrea Arcangeli wrote:
> Here the third, it registers the tux syscall at for the alpha so other
> people won't use such same syscall for something else (I didn't remove
> the #ifdefs since they don't hurt as they're undefined in mainline).
> 
> diff -urN ref/arch/alpha/kernel/entry.S tuxsys/arch/alpha/kernel/entry.S
> --- ref/arch/alpha/kernel/entry.S       Sat Apr 28 18:37:45 2001
> +++ tuxsys/arch/alpha/kernel/entry.S    Sun Apr 29 17:52:44 2001
> @@ -1004,7 +1004,15 @@
>         .quad alpha_ni_syscall
>         .quad alpha_ni_syscall                  /* 220 */
>         .quad alpha_ni_syscall
> +#ifdef CONFIG_TUX
> +       .quad __sys_tux
> +#else
> +# ifdef CONFIG_TUX_MODULE
> +       .quad sys_tux
> +# else
>         .quad alpha_ni_syscall
> +# endif
> +#endif

They don't hurt but it's also a bad precedent - you don't want to add a
ton of CONFIG_xxx to the Linus tree for stuff outside the Linus tree. 
disagree with this patch.

> Here the fifth, this defines the tux sysctl numbers (OTOH the sysctl by
> number gets broken all the time and nobody should use sysctl by number
> with new sysctls anyways).
> 
> diff -urN 2.4.5pre5/include/linux/sysctl.h tux-sysctl/include/linux/sysctl.h
> --- 2.4.5pre5/include/linux/sysctl.h    Tue May 22 22:04:27 2001
> +++ tux-sysctl/include/linux/sysctl.h   Wed May 23 19:20:48 2001
> @@ -157,7 +157,8 @@
>         NET_TR=14,
>         NET_DECNET=15,
>         NET_ECONET=16,
> -       NET_KHTTPD=17
> +       NET_KHTTPD=17,
> +       NET_TUX=18
>  };

ok

> +/* /proc/sys/net/tux/ */
> +enum {
> +       NET_TUX_DOCROOT                 =  1,
> +       NET_TUX_LOGFILE                 =  2,

this conflicts with noone, so can wait for tux patch


> diff -urN 2.4.5pre5/include/linux/kernel_stat.h tux-kstat/include/linux/kernel_stat.h
> --- 2.4.5pre5/include/linux/kernel_stat.h       Tue May 15 21:40:17 2001
> +++ tux-kstat/include/linux/kernel_stat.h       Wed May 23 19:06:38 2001
> @@ -33,6 +33,53 @@
>         unsigned int ierrors, oerrors;
>         unsigned int collisions;
>         unsigned int context_swtch;
> +       unsigned int context_swtch_cross;
> +       unsigned int nr_free_pending;
> +       unsigned int nr_allocated;
> +       unsigned int nr_idle_input_pending;
> +       unsigned int nr_output_space_pending;
> +       unsigned int nr_work_pending;
> +       unsigned int nr_input_pending;
> +       unsigned int nr_cachemiss_pending;
> +       unsigned int nr_secondary_pending;
> +       unsigned int nr_output_pending;
> +       unsigned int nr_redirect_pending;
> +       unsigned int nr_postpone_pending;
> +       unsigned int nr_finish_pending;
> +       unsigned int nr_userspace_pending;
> +       unsigned int static_lookup_cachemisses;
> +       unsigned int static_sendfile_cachemisses;
> +       unsigned int user_lookup_cachemisses;
> +       unsigned int user_fetch_cachemisses;
> +       unsigned int user_sendobject_cachemisses;
> +       unsigned int user_sendobject_write_misses;
> +       unsigned int user_sendbuf_cachemisses;
> +       unsigned int user_sendbuf_write_misses;
> +#define URL_HIST_SIZE 1000
> +       unsigned int url_hist_hits[URL_HIST_SIZE];
> +       unsigned int url_hist_misses[URL_HIST_SIZE];
> +       unsigned int input_fastpath;
> +       unsigned int input_slowpath;
> +       unsigned int inputqueue_got_packet;
> +       unsigned int inputqueue_no_packet;
> +       unsigned int nr_keepalive_optimized;
> +
> +       unsigned int parse_static_incomplete;
> +       unsigned int parse_static_redirect;
> +       unsigned int parse_static_cachemiss;
> +       unsigned int parse_static_nooutput;
> +       unsigned int parse_static_normal;
> +       unsigned int parse_dynamic_incomplete;
> +       unsigned int parse_dynamic_redirect;
> +       unsigned int parse_dynamic_cachemiss;
> +       unsigned int parse_dynamic_nooutput;
> +       unsigned int parse_dynamic_normal;
> +       unsigned int complete_parsing;
> +
> +       unsigned int nr_keepalive_reqs;
> +       unsigned int nr_nonkeepalive_reqs;
> +#define KEEPALIVE_HIST_SIZE 100
> +       unsigned int keepalive_hist[KEEPALIVE_HIST_SIZE];
>  };

ouch!   I would understand if this was inside CONFIG_TUX, but even so I
would disagree until Tux is merged.

-- 
Jeff Garzik      | Andre the Giant has a posse.
Building 1024    |
MandrakeSoft     |

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: unregistered changes to the user<->kernel API
  2001-06-14 17:21   ` Andrea Arcangeli
@ 2001-06-14 17:32     ` Richard Henderson
  2001-06-14 17:47       ` Andrea Arcangeli
  2001-06-14 18:10       ` Alexander Viro
  0 siblings, 2 replies; 15+ messages in thread
From: Richard Henderson @ 2001-06-14 17:32 UTC (permalink / raw)
  To: Andrea Arcangeli; +Cc: Linus Torvalds, Ingo Molnar, linux-kernel

On Thu, Jun 14, 2001 at 07:21:22PM +0200, Andrea Arcangeli wrote:
> Richard are you sure we can break O_NOFOLLOW and still expect the machine to
> boot?
[uses in glibc]

Yes, I saw those.  What is the effect of O_NOFOLLOW?  To not
follow symbolic links when opening the file.  If you open a
regular file, in effect nothing happens.  Moreover, if these
opens were not finding files now, the system wouldn't work.

So: the effect, I suppose, is (1) disabling some security
within glibc, and (2) making these accesses slower since they
will be considered O_DIRECT after the change.

Which doesn't seem that life-threatening to me.


r~

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: unregistered changes to the user<->kernel API
  2001-06-14 17:25 ` Jeff Garzik
@ 2001-06-14 17:44   ` Andrea Arcangeli
  2001-06-14 17:52     ` Jeff Garzik
  0 siblings, 1 reply; 15+ messages in thread
From: Andrea Arcangeli @ 2001-06-14 17:44 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Linus Torvalds, Ingo Molnar, linux-kernel, Richard Henderson

On Thu, Jun 14, 2001 at 01:25:10PM -0400, Jeff Garzik wrote:
> They don't hurt but it's also a bad precedent - you don't want to add a
> ton of CONFIG_xxx to the Linus tree for stuff outside the Linus tree. 
> disagree with this patch.

If tux will ever be merged into mainline eventually I don't think
there's a value in defer such bit. Of course if tux will never get
merged then I totally agree with you.

> this conflicts with noone, so can wait for tux patch

same as above.

> ouch!   I would understand if this was inside CONFIG_TUX, but even so I
> would disagree until Tux is merged.

Then you may prefer to wait tux to be merged before merging the rest as
well, in the meantime 90% of the kernels running out there will show
such stuff out of /proc/stats (hopefully "the same stuff" which is why
I'm posting those patches in first place).

Andrea

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: unregistered changes to the user<->kernel API
  2001-06-14 17:32     ` Richard Henderson
@ 2001-06-14 17:47       ` Andrea Arcangeli
  2001-06-14 18:16         ` Andrea Arcangeli
  2001-06-14 18:17         ` Richard Henderson
  2001-06-14 18:10       ` Alexander Viro
  1 sibling, 2 replies; 15+ messages in thread
From: Andrea Arcangeli @ 2001-06-14 17:47 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Linus Torvalds, Ingo Molnar, linux-kernel

On Thu, Jun 14, 2001 at 10:32:49AM -0700, Richard Henderson wrote:
> within glibc, and (2) making these accesses slower since they
> will be considered O_DIRECT after the change.

and then read/write will return -EINVAL which is life-threatening.
O_DIRECT like rawio via /dev/raw imposes special buffer size and
alignment (size multiple of softblocksize of the fs and softblocksize
alignment, at max I can turn it down to hardblocksize without intensive
changes and guaranteeing zerocopy [modulo bounce buffers on x86 of
course]).

So in short at least glibc would need to be replaced...

Andrea

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: unregistered changes to the user<->kernel API
  2001-06-14 17:44   ` Andrea Arcangeli
@ 2001-06-14 17:52     ` Jeff Garzik
  2001-06-14 18:03       ` Andrea Arcangeli
  0 siblings, 1 reply; 15+ messages in thread
From: Jeff Garzik @ 2001-06-14 17:52 UTC (permalink / raw)
  To: Andrea Arcangeli
  Cc: Linus Torvalds, Ingo Molnar, linux-kernel, Richard Henderson

Andrea Arcangeli wrote:
> 
> On Thu, Jun 14, 2001 at 01:25:10PM -0400, Jeff Garzik wrote:
> > They don't hurt but it's also a bad precedent - you don't want to add a
> > ton of CONFIG_xxx to the Linus tree for stuff outside the Linus tree.
> > disagree with this patch.
> 
> If tux will ever be merged into mainline eventually I don't think
> there's a value in defer such bit. Of course if tux will never get
> merged then I totally agree with you.

You're missing the point -- it's a bad precedent.

How many kernel forks and patches exist out there on the net?

Many of these patches will get merged eventually.  But it is a bad idea
to include bits of such into the Linus tree, when they are not used in
the Linus tree.

-Exceptions- to this policy should be carefully considered...  reserving
syscall and sysctl numbers certainly makes sense.  Bloating kernel_stat
with tons of unused numbers, some specific to web servers AFAICS, does
not make sense.

Tangent:  Why is this webserver-specific crap in kernel_stat anyway?  It
looks like there should be a separate per-cpu structure for webserver
statistics.

> +       unsigned int parse_static_incomplete;
> +       unsigned int parse_static_redirect;
> +       unsigned int parse_static_cachemiss;
> +       unsigned int parse_static_nooutput;
> +       unsigned int parse_static_normal;
> +       unsigned int parse_dynamic_incomplete;
> +       unsigned int parse_dynamic_redirect;
> +       unsigned int parse_dynamic_cachemiss;
> +       unsigned int parse_dynamic_nooutput;
> +       unsigned int parse_dynamic_normal;
> +       unsigned int complete_parsing;
> +
> +       unsigned int nr_keepalive_reqs;
> +       unsigned int nr_nonkeepalive_reqs;
> +#define KEEPALIVE_HIST_SIZE 100
> +       unsigned int keepalive_hist[KEEPALIVE_HIST_SIZE];

Even when merging Tux, I would hope Linus would not apply this
particular change.

	Jeff


-- 
Jeff Garzik      | Andre the Giant has a posse.
Building 1024    |
MandrakeSoft     |

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: unregistered changes to the user<->kernel API
  2001-06-14 17:52     ` Jeff Garzik
@ 2001-06-14 18:03       ` Andrea Arcangeli
  2001-06-14 18:11         ` Alan Cox
  0 siblings, 1 reply; 15+ messages in thread
From: Andrea Arcangeli @ 2001-06-14 18:03 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Linus Torvalds, Ingo Molnar, linux-kernel, Richard Henderson

On Thu, Jun 14, 2001 at 01:52:44PM -0400, Jeff Garzik wrote:
> You're missing the point -- it's a bad precedent.
> 
> How many kernel forks and patches exist out there on the net?

How many of them are applied to 90% of kernels running out there? How
many of them will get merged eventually? How many of them makes
modifications to the kernel that are visible to userspace in any
possibly configuration of the kernel?

> Tangent:  Why is this webserver-specific crap in kernel_stat anyway?  It
> Even when merging Tux, I would hope Linus would not apply this
> particular change.

Indeed, I also said this in my first email :)

Andrea

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: unregistered changes to the user<->kernel API
  2001-06-14 17:32     ` Richard Henderson
  2001-06-14 17:47       ` Andrea Arcangeli
@ 2001-06-14 18:10       ` Alexander Viro
  1 sibling, 0 replies; 15+ messages in thread
From: Alexander Viro @ 2001-06-14 18:10 UTC (permalink / raw)
  To: Richard Henderson
  Cc: Andrea Arcangeli, Linus Torvalds, Ingo Molnar, linux-kernel



On Thu, 14 Jun 2001, Richard Henderson wrote:

> Yes, I saw those.  What is the effect of O_NOFOLLOW?  To not
> follow symbolic links when opening the file.  If you open a
> regular file, in effect nothing happens.  Moreover, if these
> opens were not finding files now, the system wouldn't work.
> 
> So: the effect, I suppose, is (1) disabling some security
> within glibc, and (2) making these accesses slower since they
> will be considered O_DIRECT after the change.
> 
> Which doesn't seem that life-threatening to me.

O_NOFOLLOW is used to deal with symlink attacks. Breaking it means
that for quite a few binaries you are opening security holes. And
since it's a flagday change, you'll get the situation when no version
will work for all kernels. Bad idea, IMO.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: unregistered changes to the user<->kernel API
  2001-06-14 18:03       ` Andrea Arcangeli
@ 2001-06-14 18:11         ` Alan Cox
  2001-06-14 18:27           ` Andrea Arcangeli
  2001-06-14 21:43           ` Albert D. Cahalan
  0 siblings, 2 replies; 15+ messages in thread
From: Alan Cox @ 2001-06-14 18:11 UTC (permalink / raw)
  To: Andrea Arcangeli
  Cc: Jeff Garzik, Linus Torvalds, Ingo Molnar, linux-kernel,
	Richard Henderson

> > Tangent:  Why is this webserver-specific crap in kernel_stat anyway?  It
> > Even when merging Tux, I would hope Linus would not apply this
> > particular change.
> 
> Indeed, I also said this in my first email :)

I dont see why Tux should be merged. If we have people achieving the same
performance in user space with the core facilities tux added to the kernel
like the better irq/sendfile stuff why bother merging tux ?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: unregistered changes to the user<->kernel API
  2001-06-14 17:47       ` Andrea Arcangeli
@ 2001-06-14 18:16         ` Andrea Arcangeli
  2001-06-14 18:17         ` Richard Henderson
  1 sibling, 0 replies; 15+ messages in thread
From: Andrea Arcangeli @ 2001-06-14 18:16 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Linus Torvalds, Ingo Molnar, linux-kernel

On Thu, Jun 14, 2001 at 07:47:57PM +0200, Andrea Arcangeli wrote:
> On Thu, Jun 14, 2001 at 10:32:49AM -0700, Richard Henderson wrote:
> > within glibc, and (2) making these accesses slower since they
> > will be considered O_DIRECT after the change.
> 
> and then read/write will return -EINVAL which is life-threatening.
> O_DIRECT like rawio via /dev/raw imposes special buffer size and
> alignment (size multiple of softblocksize of the fs and softblocksize
> alignment, at max I can turn it down to hardblocksize without intensive
> changes and guaranteeing zerocopy [modulo bounce buffers on x86 of
> course]).
> 
> So in short at least glibc would need to be replaced...

in the meantime we solve this issue I released a new o_direct patch and
a 2.4.6pre3aa1 with O_DIRECT set like in the patches I sent to Linus at
the start of the thread. As soon as we take a definitive decision I will
update them. (in the meantime alpha users will at least be allowed again
to use O_SYNC with o_direct support applied ;)

Andrea

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: unregistered changes to the user<->kernel API
  2001-06-14 17:47       ` Andrea Arcangeli
  2001-06-14 18:16         ` Andrea Arcangeli
@ 2001-06-14 18:17         ` Richard Henderson
  1 sibling, 0 replies; 15+ messages in thread
From: Richard Henderson @ 2001-06-14 18:17 UTC (permalink / raw)
  To: Andrea Arcangeli; +Cc: Linus Torvalds, Ingo Molnar, linux-kernel

On Thu, Jun 14, 2001 at 07:47:57PM +0200, Andrea Arcangeli wrote:
> On Thu, Jun 14, 2001 at 10:32:49AM -0700, Richard Henderson wrote:
> > within glibc, and (2) making these accesses slower since they
> > will be considered O_DIRECT after the change.
> 
> and then read/write will return -EINVAL which is life-threatening.

It would?  I thought it would be ignored at minimum. 

Damnit, I guess we'll have to move it after all.  How
completely irritating.


r~

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: unregistered changes to the user<->kernel API
  2001-06-14 18:11         ` Alan Cox
@ 2001-06-14 18:27           ` Andrea Arcangeli
  2001-06-14 21:43           ` Albert D. Cahalan
  1 sibling, 0 replies; 15+ messages in thread
From: Andrea Arcangeli @ 2001-06-14 18:27 UTC (permalink / raw)
  To: Alan Cox
  Cc: Jeff Garzik, Linus Torvalds, Ingo Molnar, linux-kernel,
	Richard Henderson

On Thu, Jun 14, 2001 at 07:11:27PM +0100, Alan Cox wrote:
> I dont see why Tux should be merged. If we have people achieving the same
> performance in user space with the core facilities tux added to the kernel

I never had doubt that you could do the same in userspace using the
zorocopy functionality (see the old threads with Jeff V. Merkey and
netware when I was saying you don't need it in kernel) so in theory I
totally agree since the first place...

> like the better irq/sendfile stuff why bother merging tux ?

... but in practice x15 is nor open source nor free software and I'd
prefer to have an open choice. I wouldn't even think to merge tux if
zope, apache, thttpd would run as fast as tux of course.

Andrea

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: unregistered changes to the user<->kernel API
  2001-06-14 18:11         ` Alan Cox
  2001-06-14 18:27           ` Andrea Arcangeli
@ 2001-06-14 21:43           ` Albert D. Cahalan
  1 sibling, 0 replies; 15+ messages in thread
From: Albert D. Cahalan @ 2001-06-14 21:43 UTC (permalink / raw)
  To: Alan Cox
  Cc: Andrea Arcangeli, Jeff Garzik, Linus Torvalds, Ingo Molnar,
	linux-kernel, Richard Henderson

Alan Cox writes:

> I dont see why Tux should be merged. If we have people achieving the same
> performance in user space with the core facilities tux added to the kernel
> like the better irq/sendfile stuff why bother merging tux ?

1. We have khttpd, which should be replaced by something faster.
2. Tux makes a nice example.
3. Tux can be the fastest. If it isn't, it needs more work.

Toward the end of the X15 discussion, Ingo Molnar mentioned
something he'd not implemented yet. I don't recall exactly,
but for sure Tux hasn't run out of optimizations to do.

Also the kernel-CGI feature has not been used in benchmarks.
Tux has been running user code.

IMHO the Tux server could be renamed "khttpd" and dropped in
with whatever is needed to be compatible for existing setups.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2001-06-14 21:45 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2001-06-14 17:12 unregistered changes to the user<->kernel API Andrea Arcangeli
2001-06-14 17:16 ` Andrea Arcangeli
2001-06-14 17:21   ` Andrea Arcangeli
2001-06-14 17:32     ` Richard Henderson
2001-06-14 17:47       ` Andrea Arcangeli
2001-06-14 18:16         ` Andrea Arcangeli
2001-06-14 18:17         ` Richard Henderson
2001-06-14 18:10       ` Alexander Viro
2001-06-14 17:25 ` Jeff Garzik
2001-06-14 17:44   ` Andrea Arcangeli
2001-06-14 17:52     ` Jeff Garzik
2001-06-14 18:03       ` Andrea Arcangeli
2001-06-14 18:11         ` Alan Cox
2001-06-14 18:27           ` Andrea Arcangeli
2001-06-14 21:43           ` Albert D. Cahalan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).