linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] in-core AFS multiplexor and PAG support
@ 2003-05-13 15:34 David Howells
  0 siblings, 0 replies; 21+ messages in thread
From: David Howells @ 2003-05-13 15:34 UTC (permalink / raw)
  To: torvalds; +Cc: linux-kernel, linux-fsdevel, openafs-devel


Hi Linus,

Here's a patch to add three things that are required for AFS and that may be
of use to other stuff (such as NFSv4 and Samba):

 (1) PAG (Process Authentication Group) support. A PAG is ID'd by a unique
     number, and is represented in memory as a structure that has a ring of
     associated authentication tokens.

     Each process can either be part of a PAG, or it can PAG-less - in which
     case it has no authentication tokens.

     Two new syscalls are added: setpag and getpag.

 (2) Authentication token support. An authentication token is a blob of binary
     data that is keyed by filesystem name and fs-specific key (such as AFS
     cell name or SMB workgroup).

     These are retained in two places: each PAG has a ring of tokens
     appropriate to the group of processes within that PAG; and each struct
     file has a pointer to the single token governing that file (if there is
     one).

 (3) AFS multiplexor support. Not complete at the moment, but implemented far
     enough to provide access to the PAG mechanism. Further patches will be
     forthcoming to make this fully functional.

It is my intention to add Trond's vfs_cred stuff in at some point, but that
has a lot greater impact than this patch (which has negligible impact).

David

diff -uNr linux-2.5.69/arch/i386/kernel/entry.S linux-2.5.69-cred/arch/i386/kernel/entry.S
--- linux-2.5.69/arch/i386/kernel/entry.S	2003-05-06 15:06:47.000000000 +0100
+++ linux-2.5.69-cred/arch/i386/kernel/entry.S	2003-05-13 11:16:17.000000000 +0100
@@ -721,7 +721,7 @@
 	.long sys_bdflush
 	.long sys_sysfs		/* 135 */
 	.long sys_personality
-	.long sys_ni_syscall	/* reserved for afs_syscall */
+	.long sys_afs
 	.long sys_setfsuid16
 	.long sys_setfsgid16
 	.long sys_llseek	/* 140 */
@@ -852,6 +852,7 @@
  	.long sys_clock_gettime		/* 265 */
  	.long sys_clock_getres
  	.long sys_clock_nanosleep
- 
+	.long sys_setpag
+	.long sys_getpag
  
 nr_syscalls=(.-sys_call_table)/4
diff -uNr linux-2.5.69/fs/afssyscall.c linux-2.5.69-cred/fs/afssyscall.c
--- linux-2.5.69/fs/afssyscall.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.5.69-cred/fs/afssyscall.c	2003-05-13 16:02:47.000000000 +0100
@@ -0,0 +1,223 @@
+/* afssyscall.c: AFS system call multiplexor
+ *
+ * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/afs_syscall.h>
+#include <linux/linkage.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/cred.h>
+#include <asm/uaccess.h>
+
+static const struct {
+	u_int8_t	flags;
+#define VIOC_PATH_YES		0x01	/* must be a path */
+#define VIOC_PATH_NO		0x02	/* must not be a path */
+#define VIOC_FOLLOW_OPT		0x00	/* followsymlinks can be anything */
+#define VIOC_FOLLOW_NO		0x04	/* followsymlinks must be false */
+#define VIOC_IN_OPT		0x00	/* input data optional */
+#define VIOC_IN_YES		0x08	/* input data required */
+#define VIOC_IN_NO		0x10	/* input data prohibited */
+#define VIOC_OUT_OPT		0x00	/* output buffer optional */  
+#define VIOC_OUT_YES		0x20	/* output buffer required */  
+#define VIOC_OUT_NO		0x40	/* output buffer prohibited */
+#define VIOC_VALID		0x80	/* entry is valid */
+
+} afs_vioc_table[] = {
+
+#define _v(NAME,P,F,I,O) [NAME] = \
+{ VIOC_VALID | VIOC_PATH_##P | VIOC_FOLLOW_##F | VIOC_IN_##I | VIOC_OUT_##O }
+
+	/* command name                  path folw  in  out */
+	_v(__VIOCSETAL			, YES, OPT, YES, NO ),
+	_v(__VIOCGETAL			, YES, OPT, NO , YES),
+	_v(__VIOCSETTOK			, NO , NO , YES, NO ),
+	_v(__VIOCGETVOLSTAT		, YES, OPT, NO , YES),
+	_v(__VIOCSETVOLSTAT		, YES, OPT, YES, NO ),
+	_v(__VIOCFLUSH			, YES, OPT, NO , NO ),
+	_v(__VIOCGETTOK			, NO , NO , YES, YES),
+	_v(__VIOCUNLOG			, NO , NO , NO , NO ),
+	_v(__VIOCCKSERV			, NO , NO , OPT, OPT),
+	_v(__VIOCCKBACK			, NO , NO , NO , NO ),
+	_v(__VIOCCKCONN			, NO , NO , NO , YES),
+	_v(__VIOCWHEREIS		, YES, OPT, NO , YES),
+	_v(__VIOCACCESS			, YES, OPT, YES, NO ),
+	_v(__VIOCUNPAG			, NO , NO , NO , NO ),
+	_v(__VIOCGETFID			, YES, OPT, NO , YES),
+	_v(__VIOCSETCACHESIZE		, NO , NO , YES, NO ),
+	_v(__VIOCFLUSHCB		, YES, OPT, NO , NO ),
+	_v(__VIOCNEWCELL		, NO , NO , YES, NO ),
+	_v(__VIOCGETCELL		, NO , NO , YES, YES),
+	_v(__VIOC_DELETE_MT_PT		, YES, NO , YES, NO ),
+	_v(__VIOC_STAT_MT_PT		, YES, OPT, YES, YES), /* !! can't use xattr */
+	_v(__VIOC_FILE_CELL_NAME	, YES, OPT, NO , YES),
+	_v(__VIOC_GET_WS_CELL		, NO , NO , NO , YES),
+	_v(__VIOC_AFS_MARINER_HOST	, NO , NO , YES, OPT),
+	_v(__VIOC_GET_PRIMARY_CELL	, NO , NO , NO , YES),
+	_v(__VIOC_VENUSLOG		, NO , NO , YES, OPT),
+	_v(__VIOC_GETCELLSTATUS		, NO , NO , YES, YES),
+	_v(__VIOC_SETCELLSTATUS		, NO , NO , YES, NO ),
+	_v(__VIOC_FLUSHVOLUME		, YES, OPT, NO , NO ),
+	_v(__VIOC_AFS_SYSNAME		, NO , NO , YES, OPT),
+	_v(__VIOC_EXPORTAFS		, NO , NO , YES, OPT),
+	_v(__VIOCGETCACHEPARMS		, NO , NO , NO , YES),
+
+};
+
+static int vfs_pioctl(const char __user *path, long cmd, void __user *arg, int followsymlinks);
+static int vfs_pioctl_inode(struct nameidata *nd, long cmd, struct afs_ioctl *ioc);
+static int vfs_pioctl_fs(long cmd, struct afs_ioctl *ioc);
+
+/*****************************************************************************/
+/*
+ * AFS syscall multiplexor
+ */
+int sys_afs(long subsyscall, long p1, long p2, long p3, long p4, long p5)
+{
+	switch (subsyscall) {
+	case AFS_SYSCALL_PIOCTL:	return vfs_pioctl((const char*)p1,p2,(void*)p3,p4);
+	case AFS_SYSCALL_SETPAG:	return sys_setpag(-1);
+	case AFS_SYSCALL_CALL:		return -ENOSYS;
+	case AFS_SYSCALL_ICL:		return -ENOSYS;
+	default:			return -ENOSYS;
+	}
+
+} /* end sys_afs() */
+
+/*****************************************************************************/
+/*
+ * handle path-based ioctl
+ */
+static int vfs_pioctl(const char __user *path, long cmd, void __user *arg, int followsymlinks)
+{
+	struct afs_ioctl uioc, ioc;
+	struct nameidata nd;
+	void *data;
+	char flags;
+	int err;
+
+	if (_IOC_NR(cmd) < 0 || _IOC_NR(cmd) >= sizeof(afs_vioc_table)/sizeof(afs_vioc_table[0]))
+		return -EINVAL;
+	flags = afs_vioc_table[_IOC_NR(cmd)].flags;
+
+	if (!(flags & VIOC_VALID))
+		return -EINVAL;
+
+	if ((flags & VIOC_PATH_NO	&& path			) ||
+	    (flags & VIOC_PATH_YES	&& !path		) ||
+	    (flags & VIOC_FOLLOW_NO	&& followsymlinks	))
+		return -EINVAL;
+
+	if (copy_from_user(&uioc,arg,sizeof(uioc)) != 0)
+		return -EFAULT;
+
+	if ((flags & VIOC_IN_YES	&& !uioc.in	) ||
+	    (flags & VIOC_IN_NO		&& uioc.in	) ||
+	    (flags & VIOC_OUT_YES	&& !uioc.out	) ||
+	    (flags & VIOC_OUT_NO	&& uioc.out	))
+		return -EINVAL;
+
+	if (uioc.in_size<0)
+		ioc.in_size = 0;
+	else if (uioc.in_size > PAGE_SIZE/2)
+		return -E2BIG;
+	else
+		ioc.in_size = uioc.in_size;
+
+	if (uioc.out_size<0)
+		ioc.out_size = 0;
+	else if (uioc.out_size > PAGE_SIZE/2)
+		return -E2BIG;
+	else
+		ioc.out_size = uioc.out_size;
+
+	if ((flags & VIOC_IN_YES	&& ioc.in_size<=0	) ||
+	    (flags & VIOC_IN_NO		&& ioc.in_size>0	) ||
+	    (flags & VIOC_OUT_YES	&& ioc.out_size<=0	) ||
+	    (flags & VIOC_OUT_NO	&& ioc.out_size>0	))
+		return -EINVAL;
+
+	data = (void*) get_zeroed_page(GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	ioc.in	= ioc.in_size>0  ? data : NULL;
+	ioc.out	= ioc.out_size>0 ? data + PAGE_SIZE/2 : NULL;
+
+	if (ioc.in && copy_from_user(ioc.in,uioc.in,ioc.in_size) != 0) {
+		free_page((unsigned long)data);
+		return -EFAULT;
+	}
+
+	/* perform the appropriate action */
+	if (!path) {
+		err = vfs_pioctl_fs(cmd,&ioc);
+	}
+	else {
+		unsigned flags = followsymlinks ? LOOKUP_FOLLOW : 0;
+
+		err = __user_walk(path,flags,&nd);
+		if (!err) {
+			err = vfs_pioctl_inode(&nd,cmd,&ioc);
+			path_release(&nd);
+		}
+	}
+
+	/* write back the result, zero padding to the size of the buffer */
+	if (err==0 || uioc.out_size) {
+		if (copy_to_user(uioc.out,ioc.out,uioc.out_size) != 0)
+			err = -EFAULT;
+	}
+
+	free_page((unsigned long)data);
+	return err;
+
+} /* end vfs_pioctl() */
+
+/*****************************************************************************/
+/*
+ * pioctls that require an inode
+ */
+static int vfs_pioctl_inode(struct nameidata *nd, long cmd, struct afs_ioctl *ioc)
+{
+#if 0
+	printk("vfs_pioctl_inode(%s,%ld,{is=%hd,os=%hd})\n",
+	       nd->dentry->d_name.name,_IOC_NR(cmd),ioc->in_size,ioc->out_size);
+#endif
+
+	return -EINVAL; /* not yet complete */
+
+} /* end vfs_pioctl_inode() */
+
+/*****************************************************************************/
+/*
+ * pioctls that don't require an inode, but rather work on the FS as a whole
+ */
+static int vfs_pioctl_fs(long cmd, struct afs_ioctl *ioc)
+{
+#if 0
+	printk("vfs_pioctl_fs(%ld,{is=%hd,os=%hd})\n",
+	       _IOC_NR(cmd),ioc->in_size,ioc->out_size);
+#endif
+
+	switch (cmd) {
+	case VIOCUNPAG:
+	case VIOCUNLOG:
+		ioc->out_size = 0;
+		vfs_unpag("afs");
+		return 0;
+	default:
+		return -EINVAL; /* not yet complete */
+	}
+
+} /* end vfs_pioctl_fs() */
diff -uNr linux-2.5.69/fs/file_table.c linux-2.5.69-cred/fs/file_table.c
--- linux-2.5.69/fs/file_table.c	2003-05-06 15:04:45.000000000 +0100
+++ linux-2.5.69-cred/fs/file_table.c	2003-05-13 13:54:58.000000000 +0100
@@ -166,6 +166,7 @@
 	if (file->f_op && file->f_op->release)
 		file->f_op->release(inode, file);
 	security_file_free(file);
+	if (file->f_token) vfs_token_put(file->f_token);
 	fops_put(file->f_op);
 	if (file->f_mode & FMODE_WRITE)
 		put_write_access(inode);
diff -uNr linux-2.5.69/fs/Makefile linux-2.5.69-cred/fs/Makefile
--- linux-2.5.69/fs/Makefile	2003-05-06 15:04:46.000000000 +0100
+++ linux-2.5.69-cred/fs/Makefile	2003-05-13 11:18:20.000000000 +0100
@@ -10,7 +10,8 @@
 		namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \
 		dcache.o inode.o attr.o bad_inode.o file.o dnotify.o \
 		filesystems.o namespace.o seq_file.o xattr.o libfs.o \
-		fs-writeback.o mpage.o direct-io.o aio.o eventpoll.o
+		fs-writeback.o mpage.o direct-io.o aio.o eventpoll.o \
+		afssyscall.o
 
 obj-$(CONFIG_COMPAT) += compat.o
 
diff -uNr linux-2.5.69/fs/open.c linux-2.5.69-cred/fs/open.c
--- linux-2.5.69/fs/open.c	2003-05-06 15:04:45.000000000 +0100
+++ linux-2.5.69-cred/fs/open.c	2003-05-13 11:28:08.000000000 +0100
@@ -46,7 +46,7 @@
 	struct nameidata nd;
 	int error;
 
-	error = user_path_walk(path, &nd);
+	error = user_path_walk(path,&nd);
 	if (!error) {
 		struct statfs tmp;
 		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
diff -uNr linux-2.5.69/fs/proc/array.c linux-2.5.69-cred/fs/proc/array.c
--- linux-2.5.69/fs/proc/array.c	2003-05-06 15:07:08.000000000 +0100
+++ linux-2.5.69-cred/fs/proc/array.c	2003-05-13 10:58:56.000000000 +0100
@@ -154,13 +154,14 @@
 	read_lock(&tasklist_lock);
 	buffer += sprintf(buffer,
 		"State:\t%s\n"
+		"Pag:\t%d\n"
 		"Tgid:\t%d\n"
 		"Pid:\t%d\n"
 		"PPid:\t%d\n"
 		"TracerPid:\t%d\n"
 		"Uid:\t%d\t%d\t%d\t%d\n"
 		"Gid:\t%d\t%d\t%d\t%d\n",
-		get_task_state(p), p->tgid,
+		get_task_state(p), p->vfspag ? p->vfspag->pag : 0, p->tgid,
 		p->pid, p->pid ? p->real_parent->pid : 0,
 		p->pid && p->ptrace ? p->parent->pid : 0,
 		p->uid, p->euid, p->suid, p->fsuid,
diff -uNr linux-2.5.69/include/asm-i386/posix_types.h linux-2.5.69-cred/include/asm-i386/posix_types.h
--- linux-2.5.69/include/asm-i386/posix_types.h	2003-05-06 15:04:37.000000000 +0100
+++ linux-2.5.69-cred/include/asm-i386/posix_types.h	2003-05-12 10:19:15.000000000 +0100
@@ -13,6 +13,7 @@
 typedef unsigned short	__kernel_nlink_t;
 typedef long		__kernel_off_t;
 typedef int		__kernel_pid_t;
+typedef int		__kernel_pag_t;
 typedef unsigned short	__kernel_ipc_pid_t;
 typedef unsigned short	__kernel_uid_t;
 typedef unsigned short	__kernel_gid_t;
diff -uNr linux-2.5.69/include/asm-i386/unistd.h linux-2.5.69-cred/include/asm-i386/unistd.h
--- linux-2.5.69/include/asm-i386/unistd.h	2003-05-06 15:04:37.000000000 +0100
+++ linux-2.5.69-cred/include/asm-i386/unistd.h	2003-05-13 10:47:59.000000000 +0100
@@ -273,8 +273,10 @@
 #define __NR_clock_gettime	(__NR_timer_create+6)
 #define __NR_clock_getres	(__NR_timer_create+7)
 #define __NR_clock_nanosleep	(__NR_timer_create+8)
+#define __NR_setpag		268
+#define __NR_getpag		269
 
-#define NR_syscalls 268
+#define NR_syscalls 270
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 
diff -uNr linux-2.5.69/include/linux/afs_syscall.h linux-2.5.69-cred/include/linux/afs_syscall.h
--- linux-2.5.69/include/linux/afs_syscall.h	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.5.69-cred/include/linux/afs_syscall.h	2003-05-13 13:30:15.000000000 +0100
@@ -0,0 +1,159 @@
+/* afs_syscall.h: AFS system call constants and structures
+ *
+ * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef _LINUX_AFS_SYSCALL_H
+#define _LINUX_AFS_SYSCALL_H
+
+/* level one multiplexor */
+#define AFS_SYSCALL_PIOCTL	20
+#define AFS_SYSCALL_SETPAG	21
+#define AFS_SYSCALL_CALL	28
+#define AFS_SYSCALL_ICL		30
+
+/* level two multiplexor for pioctl call */
+#define __VIOCSETAL			1	/* set the ACL on a directory */
+#define __VIOCGETAL			2	/* get the ACL on a directory */
+#define __VIOCSETTOK			3	/* set the caller's token for a cell */
+#define __VIOCGETVOLSTAT		4	/* get volume status */
+#define __VIOCSETVOLSTAT		5	/* set volume status */
+#define __VIOCFLUSH			6	/* flush an object from the cache */
+#define __VIOCGETTOK			8	/* get the caller's token for a cell */
+#define __VIOCUNLOG			9	/* discard authentication information */
+#define __VIOCCKSERV			10	/* check status of 1+ file servers */
+#define __VIOCCKBACK			11	/* mark cached volume info as stale */
+#define __VIOCCKCONN			12	/* check caller's tokens/connections */
+#define __VIOCWHEREIS			13	/* find host for a volume */
+#define __VIOCACCESS			20	/* check caller's access on an object */
+#define __VIOCUNPAG			21	/* discard authentication information */
+#define __VIOCGETFID			22	/* get FID for an object */
+#define __VIOCSETCACHESIZE		24	/* set max cache size*/
+#define __VIOCFLUSHCB			25	/* unilaterally drop a callback */
+#define __VIOCNEWCELL			26	/* set cell service information */
+#define __VIOCGETCELL			27	/* get cell configuration entry */
+#define __VIOC_DELETE_MT_PT		28	/* delete a mount point */
+#define __VIOC_STAT_MT_PT		29	/* get contents of a mount point */
+#define __VIOC_FILE_CELL_NAME		30	/* get cell hosting a given object */
+#define __VIOC_GET_WS_CELL		31	/* get caller's home cell name */
+#define __VIOC_AFS_MARINER_HOST		32	/* get/set file transfer monitoring o/p */
+#define __VIOC_GET_PRIMARY_CELL		33	/* get caller's primary cell */
+#define __VIOC_VENUSLOG			34	/* enable/disable CM logging */
+#define __VIOC_GETCELLSTATUS		35	/* get status info for a cell entry */
+#define __VIOC_SETCELLSTATUS		36	/* set status info for a cell entry */
+#define __VIOC_FLUSHVOLUME		37	/* flush cached data from a volume */
+#define __VIOC_AFS_SYSNAME		38	/* get/set the @sys mapping */
+#define __VIOC_EXPORTAFS		39	/* enable/disable NFS/AFS translation */
+#define __VIOCGETCACHEPARMS		40	/* get current cache parameter values */
+
+#define __VIOC(N)	_IOW('V',N,struct afs_ioctl)
+
+#define VIOCSETAL			__VIOC(__VIOCSETAL)
+#define VIOCGETAL			__VIOC(__VIOCGETAL)
+#define VIOCSETTOK			__VIOC(__VIOCSETTOK)
+#define VIOCGETVOLSTAT			__VIOC(__VIOCGETVOLSTAT)
+#define VIOCSETVOLSTAT			__VIOC(__VIOCSETVOLSTAT)
+#define VIOCFLUSH			__VIOC(__VIOCFLUSH)
+#define VIOCGETTOK			__VIOC(__VIOCGETTOK)
+#define VIOCUNLOG			__VIOC(__VIOCUNLOG)
+#define VIOCCKSERV			__VIOC(__VIOCCKSERV)
+#define VIOCCKBACK			__VIOC(__VIOCCKBACK)
+#define VIOCCKCONN			__VIOC(__VIOCCKCONN)
+#define VIOCWHEREIS			__VIOC(__VIOCWHEREIS)
+#define VIOCACCESS			__VIOC(__VIOCACCESS)
+#define VIOCUNPAG			__VIOC(__VIOCUNPAG)
+#define VIOCGETFID			__VIOC(__VIOCGETFID)
+#define VIOCSETCACHESIZE		__VIOC(__VIOCSETCACHESIZE)
+#define VIOCFLUSHCB			__VIOC(__VIOCFLUSHCB)
+#define VIOCNEWCELL			__VIOC(__VIOCNEWCELL)
+#define VIOCGETCELL			__VIOC(__VIOCGETCELL)
+#define VIOC_DELETE_MT_PT		__VIOC(__VIOC_DELETE_MT_PT)
+#define VIOC_STAT_MT_PT			__VIOC(__VIOC_STAT_MT_PT)
+#define VIOC_FILE_CELL_NAME		__VIOC(__VIOC_FILE_CELL_NAME)
+#define VIOC_GET_WS_CELL		__VIOC(__VIOC_GET_WS_CELL)
+#define VIOC_AFS_MARINER_HOST		__VIOC(__VIOC_AFS_MARINER_HOST)
+#define VIOC_GET_PRIMARY_CELL		__VIOC(__VIOC_GET_PRIMARY_CELL)
+#define VIOC_VENUSLOG			__VIOC(__VIOC_VENUSLOG)
+#define VIOC_GETCELLSTATUS		__VIOC(__VIOC_GETCELLSTATUS)
+#define VIOC_SETCELLSTATUS		__VIOC(__VIOC_SETCELLSTATUS)
+#define VIOC_FLUSHVOLUME		__VIOC(__VIOC_FLUSHVOLUME)
+#define VIOC_AFS_SYSNAME		__VIOC(__VIOC_AFS_SYSNAME)
+#define VIOC_EXPORTAFS			__VIOC(__VIOC_EXPORTAFS)
+#define VIOCGETCACHEPARMS		__VIOC(__VIOCGETCACHEPARMS)
+
+/* level two multiplexor for AFS control operations */
+#define	AFSOP_START_RXCALLBACK		0
+#define	AFSOP_START_AFS			1
+#define	AFSOP_START_BKG			2
+#define	AFSOP_START_TRUNCDAEMON		3
+#define AFSOP_START_CS			4
+#define	AFSOP_ADDCELL			5
+#define	AFSOP_CACHEINIT			6
+#define	AFSOP_CACHEINFO			7
+#define	AFSOP_VOLUMEINFO		8
+#define	AFSOP_CACHEFILE			9
+#define	AFSOP_CACHEINODE		10
+#define	AFSOP_AFSLOG			11
+#define	AFSOP_ROOTVOLUME		12
+#define	AFSOP_STARTLOG			14
+#define	AFSOP_ENDLOG			15
+#define AFSOP_AFS_VFSMOUNT		16
+#define AFSOP_ADVISEADDR		17
+#define AFSOP_CLOSEWAIT			18
+#define	AFSOP_RXEVENT_DAEMON		19
+#define	AFSOP_GETMTU			20
+#define	AFSOP_GETIFADDRS		21
+#define	AFSOP_ADDCELL2			29
+#define	AFSOP_AFSDB_HANDLER		30
+#define	AFSOP_SET_DYNROOT		31
+#define	AFSOP_ADDCELLALIAS		32
+#define	AFSOP_SET_FAKESTAT		33
+#define	AFSOP_CELLINFO			34
+#define	AFSOP_SET_THISCELL		35
+#define AFSOP_BASIC_INIT		36
+
+/* level two multiplexor for In Core Logging (ICL) call */
+#define AFS_ICL_OP_COPYOUT		1
+#define	AFS_ICL_OP_ENUMLOGS		2
+#define	AFS_ICL_OP_CLRLOG		3
+#define	AFS_ICL_OP_CLRSET		4
+#define AFS_ICL_OP_CLRALL		5
+#define AFS_ICL_OP_ENUMSETS		6
+#define	AFS_ICL_OP_SETSTAT		7
+#define	AFS_ICL_OP_SETSTATALL		8
+#define AFS_ICL_OP_SETLOGSIZE		9
+#define	AFS_ICL_OP_ENUMLOGSBYSET	10
+#define AFS_ICL_OP_GETSETINFO		11
+#define AFS_ICL_OP_GETLOGINFO		12
+#define AFS_ICL_OP_COPYOUTCLR		13
+#define AFS_ICL_OP_VERSION		14
+
+/*
+ * all AFS pioctls take the same sort of argument
+ */
+struct afs_ioctl
+{
+	char		*in;		/* input buffer */
+	char		*out;		/* output buffer */
+	int16_t		in_size;	/* size of input buffer */
+	int16_t		out_size;	/* maximum size of output buffer */
+};
+
+#define AFS_IOCTL_IN_SIZE_MAX	2048
+#define AFS_IOCTL_OUT_SIZE_MAX	2048
+
+/*
+ * but this is used if the kernel is 64-bit, which is really strange
+ */
+struct afs_ioctl32
+{
+	u_int32_t	in;		/* input buffer address (cast to pointer) */
+	u_int32_t	out;		/* output buffer address (cast to pointer) */
+	int16_t		in_size;	/* size of input buffer */
+	int16_t		out_size;	/* maximum size of output buffer */
+};
+
+
+
+#endif /* _LINUX_AFS_SYSCALL_H */
diff -uNr linux-2.5.69/include/linux/cred.h linux-2.5.69-cred/include/linux/cred.h
--- linux-2.5.69/include/linux/cred.h	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.5.69-cred/include/linux/cred.h	2003-05-13 13:20:06.000000000 +0100
@@ -0,0 +1,79 @@
+#ifndef _LINUX_CRED_H
+#define _LINUX_CRED_H
+
+#ifdef __KERNEL__
+
+#include <linux/param.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <asm/atomic.h>
+
+/*
+ * VFS session authentication token cache
+ *
+ * This is used to store the data required for extra levels of filesystem
+ * security (such as AFS/NFS kerberos keys, Samba workgroup/user/pass, or NTFS
+ * ACLs).
+ *
+ * VFS authentication tokens contain a single blob of data, consisting of three
+ * parts, all next to each other:
+ *   (1) An FS name
+ *   (2) A key
+ *   (3) An arbitrary chunk of data
+ *
+ * Token blobs must not be changed once passed to the core kernel for management
+ */
+struct vfs_pag
+{
+	struct rb_node		node;
+	atomic_t		usage;
+	pag_t			pag;	/* Process Authentication Group ID */
+	struct list_head	tokens;	/* authentication tokens */
+	rwlock_t		lock;
+};
+
+struct vfs_token
+{
+	atomic_t		usage;
+	struct list_head	link;	/* link in pag's list */
+	unsigned short		k_off;	/* offset of key in blob */
+	unsigned short		d_off;	/* offset of data in blob */
+	size_t			size;	/* size of blob */
+	void			*blob;	/* blob containing key + data */
+};
+
+extern int sys_setpag(pag_t);
+extern int sys_getpag(void);
+extern void vfs_unpag(const char *fsname);
+
+extern void vfs_pag_put(struct vfs_pag *);
+
+static inline struct vfs_pag *vfs_pag_get(struct vfs_pag *vfspag)
+{
+	atomic_inc(&vfspag->usage);
+	return vfspag;
+}
+
+static inline int is_vfs_token_valid(struct vfs_token *vtoken)
+{
+	return !list_empty(&vtoken->link);
+}
+
+extern int vfs_pag_add_token(const char *fsname,
+			     unsigned short klen,
+			     const void *key,
+			     size_t dlen,
+			     const void *data,
+			     struct vfs_token **_token);
+
+extern struct vfs_token *vfs_pag_find_token(const char *fsname,
+					    unsigned short klen,
+					    const void *key);
+
+extern void vfs_pag_withdraw_token(struct vfs_token *vtoken);
+
+extern void vfs_token_put(struct vfs_token *vtoken);
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_CRED_H */
diff -uNr linux-2.5.69/include/linux/fs.h linux-2.5.69-cred/include/linux/fs.h
--- linux-2.5.69/include/linux/fs.h	2003-05-13 11:02:22.000000000 +0100
+++ linux-2.5.69-cred/include/linux/fs.h	2003-05-13 11:02:35.000000000 +0100
@@ -430,6 +430,7 @@
 	mode_t			f_mode;
 	loff_t			f_pos;
 	struct fown_struct	f_owner;
+	struct vfs_token	*f_token;	/* governing credential */
 	unsigned int		f_uid, f_gid;
 	int			f_error;
 	struct file_ra_state	f_ra;
diff -uNr linux-2.5.69/include/linux/sched.h linux-2.5.69-cred/include/linux/sched.h
--- linux-2.5.69/include/linux/sched.h	2003-05-06 15:07:12.000000000 +0100
+++ linux-2.5.69-cred/include/linux/sched.h	2003-05-13 10:29:18.000000000 +0100
@@ -28,6 +28,7 @@
 #include <linux/completion.h>
 #include <linux/pid.h>
 #include <linux/percpu.h>
+#include <linux/cred.h>
 
 struct exec_domain;
 
@@ -387,6 +388,7 @@
 	gid_t gid,egid,sgid,fsgid;
 	int ngroups;
 	gid_t	groups[NGROUPS];
+	struct vfs_pag *vfspag;
 	kernel_cap_t   cap_effective, cap_inheritable, cap_permitted;
 	int keep_capabilities:1;
 	struct user_struct *user;
diff -uNr linux-2.5.69/include/linux/types.h linux-2.5.69-cred/include/linux/types.h
--- linux-2.5.69/include/linux/types.h	2003-05-06 15:04:31.000000000 +0100
+++ linux-2.5.69-cred/include/linux/types.h	2003-05-12 10:19:08.000000000 +0100
@@ -24,6 +24,7 @@
 typedef __kernel_nlink_t	nlink_t;
 typedef __kernel_off_t		off_t;
 typedef __kernel_pid_t		pid_t;
+typedef __kernel_pag_t		pag_t;
 typedef __kernel_daddr_t	daddr_t;
 typedef __kernel_key_t		key_t;
 typedef __kernel_suseconds_t	suseconds_t;
diff -uNr linux-2.5.69/init/main.c linux-2.5.69-cred/init/main.c
--- linux-2.5.69/init/main.c	2003-05-06 15:07:12.000000000 +0100
+++ linux-2.5.69-cred/init/main.c	2003-05-13 14:08:11.000000000 +0100
@@ -80,6 +80,7 @@
 extern void pidhash_init(void);
 extern void pidmap_init(void);
 extern void pte_chain_init(void);
+extern void credentials_init(void);
 extern void radix_tree_init(void);
 extern void free_initmem(void);
 extern void populate_rootfs(void);
@@ -434,6 +435,7 @@
 	pidmap_init();
 	pgtable_cache_init();
 	pte_chain_init();
+	credentials_init();
 	fork_init(num_physpages);
 	proc_caches_init();
 	security_scaffolding_startup();
diff -uNr linux-2.5.69/kernel/cred.c linux-2.5.69-cred/kernel/cred.c
--- linux-2.5.69/kernel/cred.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.5.69-cred/kernel/cred.c	2003-05-13 13:21:06.000000000 +0100
@@ -0,0 +1,367 @@
+/* cred.c: authentication credentials management
+ *
+ * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+
+static kmem_cache_t *vfs_token_cache;
+static kmem_cache_t *vfs_pag_cache;
+
+static struct rb_root	vfs_pag_tree;
+static spinlock_t	vfs_pag_lock = SPIN_LOCK_UNLOCKED;
+static pag_t		vfs_pag_next = 1;
+
+static void vfs_pag_init_once(void *_vfspag, kmem_cache_t *cachep, unsigned long flags)
+{
+	struct vfs_pag *vfspag = _vfspag;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) {
+		memset(vfspag,0,sizeof(*vfspag));
+		INIT_LIST_HEAD(&vfspag->tokens);
+		rwlock_init(&vfspag->lock);
+	}
+}
+
+static void vfs_token_init_once(void *_vtoken, kmem_cache_t *cachep, unsigned long flags)
+{
+	struct vfs_token *vtoken = _vtoken;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) {
+		memset(vtoken,0,sizeof(*vtoken));
+		INIT_LIST_HEAD(&vtoken->link);
+	}
+}
+
+void __init credentials_init(void)
+{
+	vfs_pag_cache = kmem_cache_create("vfs_pag",
+					      sizeof(struct vfs_pag),
+					      0,
+					      SLAB_HWCACHE_ALIGN,
+					      vfs_pag_init_once, NULL);
+	if (!vfs_pag_cache)
+		panic("Cannot create vfs pag SLAB cache");
+
+	vfs_token_cache = kmem_cache_create("vfs_token",
+					    sizeof(struct vfs_token),
+					    0,
+					    SLAB_HWCACHE_ALIGN,
+					    vfs_token_init_once, NULL);
+	if (!vfs_token_cache)
+		panic("Cannot create vfs token SLAB cache");
+}
+
+/*****************************************************************************/
+/*
+ * join an existing PAG (+ve), run without PAG (0), or create and join new PAG (-1)
+ * - returns ID of PAG joined or 0 if now running without a PAG
+ */
+int sys_setpag(pag_t pag)
+{
+	struct task_struct *tsk = current;
+	struct vfs_pag *vfspag, *xvfspag;
+	struct rb_node **p, *parent;
+
+	if (pag>0) {
+		/* join existing PAG */
+		if (tsk->vfspag->pag &&
+		    tsk->vfspag->pag==pag)
+			return pag;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		spin_lock(&vfs_pag_lock);
+
+		parent = NULL;
+		p = &vfs_pag_tree.rb_node;
+
+		while (*p) {
+			parent = *p;
+			vfspag = rb_entry(parent,struct vfs_pag,node);
+
+			if (pag < vfspag->pag)
+				p = &(*p)->rb_left;
+			else if (pag > vfspag->pag)
+				p = &(*p)->rb_right;
+			else
+				goto pag_found;
+		}
+
+		spin_unlock(&vfs_pag_lock);
+		return -ENOENT;
+
+	pag_found:
+		xvfspag = xchg(&current->vfspag,vfs_pag_get(vfspag));
+		spin_unlock(&vfs_pag_lock);
+
+		if (xvfspag) vfs_pag_put(xvfspag);
+		return pag;
+	}
+	else if (pag==0) {
+		/* run without a PAG */
+		xvfspag = xchg(&current->vfspag,NULL);
+
+		if (xvfspag) vfs_pag_put(xvfspag);
+		return 0;
+	}
+	else if (pag!=-1) {
+		     return -EINVAL;
+	}
+
+	/* start new PAG */
+	vfspag = kmem_cache_alloc(vfs_pag_cache,SLAB_KERNEL);
+	if (!vfspag)
+		return -ENOMEM;
+
+	atomic_set(&vfspag->usage,1);
+
+	/* PAG IDs must be +ve, >0 and unique */
+	spin_lock(&vfs_pag_lock);
+
+	vfspag->pag = vfs_pag_next++;
+	if (vfspag->pag<1)
+		vfspag->pag = 1;
+
+	parent = NULL;
+	p = &vfs_pag_tree.rb_node;
+
+	while (*p) {
+		parent = *p;
+		xvfspag = rb_entry(parent,struct vfs_pag,node);
+
+		if (vfspag->pag < xvfspag->pag)
+			p = &(*p)->rb_left;
+		else if (vfspag->pag > xvfspag->pag)
+			p = &(*p)->rb_right;
+		else
+			goto pag_exists;
+	}
+	goto insert_here;
+
+	/* we found a PAG of the same ID - walk the tree from that point
+	 * looking for the next unused PAG */
+ pag_exists:
+	for (;;) {
+		vfspag->pag = vfs_pag_next++;
+		if (vfspag->pag<1)
+			vfspag->pag = 1;
+
+		if (!parent->rb_parent)
+			p = &vfs_pag_tree.rb_node;
+		else if (parent->rb_parent->rb_left==parent)
+			p = &parent->rb_parent->rb_left;
+		else
+			p = &parent->rb_parent->rb_right;
+
+		parent = rb_next(parent);
+		if (!parent)
+			break;
+
+		xvfspag = rb_entry(parent,struct vfs_pag,node);
+		if (vfspag->pag < xvfspag->pag)
+			goto insert_here;
+	}
+
+ insert_here:
+	rb_link_node(&vfspag->node,parent,p);
+	rb_insert_color(&vfspag->node,&vfs_pag_tree);
+	spin_unlock(&vfs_pag_lock);
+
+	xvfspag = xchg(&current->vfspag,vfspag);
+	if (xvfspag) vfs_pag_put(xvfspag);
+
+	return vfspag->pag;
+} /* end sys_setpag() */
+
+/*****************************************************************************/
+/*
+ * get the PAG of the current process, or 0 if it doesn't have one
+ */
+int sys_getpag(void)
+{
+	struct vfs_pag *vfspag = current->vfspag;
+
+	return vfspag ? vfspag->pag : 0;
+
+} /* end sys_getpag() */
+
+/*****************************************************************************/
+/*
+ * dispose of a VFS pag
+ */
+void vfs_pag_put(struct vfs_pag *vfspag)
+{
+	struct vfs_token *vtoken;
+
+	if (atomic_dec_and_lock(&vfspag->usage,&vfs_pag_lock)) {
+		rb_erase(&vfspag->node,&vfs_pag_tree);
+		spin_unlock(&vfs_pag_lock);
+
+		while (!list_empty(&vfspag->tokens)) {
+			vtoken = list_entry(vfspag->tokens.next,struct vfs_token,link);
+			list_del_init(&vtoken->link);
+			vfs_token_put(vtoken);
+		}
+
+		kmem_cache_free(vfs_pag_cache,vfspag);
+	}
+
+} /* end vfs_pag_put() */
+
+/*****************************************************************************/
+/*
+ * dispose of a VFS token
+ */
+void vfs_token_put(struct vfs_token *vtoken)
+{
+	if (atomic_dec_and_test(&vtoken->usage)) {
+		kfree(vtoken->blob);
+		kmem_cache_free(vfs_pag_cache,vtoken);
+	}
+
+} /* end vfs_token_put() */
+
+/*****************************************************************************/
+/*
+ * add an authentication token to a pag list
+ */
+int vfs_pag_add_token(const char *fsname,
+		      unsigned short klen,
+		      const void *key,
+		      size_t dlen,
+		      const void *data,
+		      struct vfs_token **_vtoken)
+{
+	struct vfs_token *vtoken;
+	struct vfs_pag *vfspag = current->vfspag;
+
+	*_vtoken = NULL;
+
+	if (!vfspag)
+		return -EACCES;
+
+	vtoken = kmem_cache_alloc(vfs_token_cache,SLAB_KERNEL);
+	if (!vtoken)
+		return -ENOMEM;
+
+	vtoken->k_off	= strlen(fsname) + 1;
+	vtoken->d_off	= vtoken->k_off + klen;
+	vtoken->size	= vtoken->d_off + dlen;
+
+	vtoken->blob = kmalloc(vtoken->size,SLAB_KERNEL);
+	if (!vtoken->blob) {
+		kfree(vtoken);
+		return -ENOMEM;
+	}
+
+	atomic_set(&vtoken->usage,1);
+
+	memcpy(vtoken->blob,fsname,vtoken->k_off);
+	memcpy(vtoken->blob+vtoken->k_off,key,klen);
+	memcpy(vtoken->blob+vtoken->d_off,key,dlen);
+
+	write_lock(&vfspag->lock);
+	list_add_tail(&vtoken->link,&vfspag->tokens);
+	write_unlock(&vfspag->lock);
+
+	*_vtoken = vtoken;
+	return 0;
+} /* end vfs_pag_add_token() */
+
+EXPORT_SYMBOL(vfs_pag_add_token);
+
+/*****************************************************************************/
+/*
+ * search for a token covering a particular filesystem key in the specified pag list
+ */
+struct vfs_token *vfs_pag_find_token(const char *fsname,
+				     unsigned short klen,
+				     const void *key)
+{
+	struct vfs_token *vtoken;
+	struct vfs_pag *vfspag = current->vfspag;
+
+	if (!vfspag) return NULL;
+
+	read_lock(&vfspag->lock);
+
+	list_for_each_entry(vtoken,&vfspag->tokens,link) {
+		if (vtoken->d_off-vtoken->k_off == klen &&
+		    strcmp(vtoken->blob,fsname)==0 &&
+		    memcmp(vtoken->blob+vtoken->k_off,key,klen)==0)
+			goto found;		    
+	}
+
+	read_unlock(&vfspag->lock);
+	return NULL;
+
+ found:
+	read_unlock(&vfspag->lock);
+	return vtoken;
+} /* end vfs_pag_find_token() */
+
+EXPORT_SYMBOL(vfs_pag_find_token);
+
+/*****************************************************************************/
+/*
+ * withdraw a token from a pag list
+ */
+void vfs_pag_withdraw_token(struct vfs_token *vtoken)
+{
+	struct vfs_pag *vfspag = current->vfspag;
+
+	if (!vfspag)
+		return;
+
+	write_lock(&vfspag->lock);
+	list_del_init(&vtoken->link);
+	write_unlock(&vfspag->lock);
+
+	vfs_token_put(vtoken);
+
+} /* end vfs_pag_withdraw_token() */
+
+EXPORT_SYMBOL(vfs_pag_withdraw_token);
+
+/*****************************************************************************/
+/*
+ * withdraw all tokens for the named filesystem from the current PAG
+ */
+void vfs_unpag(const char *fsname)
+{
+	struct list_head *_n, *_p;
+	struct vfs_token *vtoken;
+	struct vfs_pag *vfspag = current->vfspag;
+
+	if (!vfspag)
+		return;
+
+	write_lock(&vfspag->lock);
+
+	list_for_each_safe(_p,_n,&vfspag->tokens) {
+		vtoken = list_entry(_p,struct vfs_token,link);
+
+		if (strcmp(fsname,vtoken->blob)==0) {
+			list_del_init(&vtoken->link);
+			vfs_token_put(vtoken);
+		}
+	}
+
+	write_unlock(&vfspag->lock);
+
+} /* end vfs_unpag() */
diff -uNr linux-2.5.69/kernel/fork.c linux-2.5.69-cred/kernel/fork.c
--- linux-2.5.69/kernel/fork.c	2003-05-06 15:07:12.000000000 +0100
+++ linux-2.5.69-cred/kernel/fork.c	2003-05-13 10:39:37.000000000 +0100
@@ -884,6 +884,9 @@
 
 	if (clone_flags & CLONE_CHILD_SETTID)
 		p->set_child_tid = child_tidptr;
+
+	if (p->vfspag) vfs_pag_get(p->vfspag);
+
 	/*
 	 * Clear TID on mm_release()?
 	 */
diff -uNr linux-2.5.69/kernel/Makefile linux-2.5.69-cred/kernel/Makefile
--- linux-2.5.69/kernel/Makefile	2003-05-06 15:04:56.000000000 +0100
+++ linux-2.5.69-cred/kernel/Makefile	2003-05-13 10:45:27.000000000 +0100
@@ -3,7 +3,7 @@
 #
 
 obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
-	    exit.o itimer.o time.o softirq.o resource.o \
+	    cred.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o futex.o pid.o \
 	    rcupdate.o intermodule.o extable.o params.o posix-timers.o


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 17:20       ` Jan Harkes
  2003-05-13 18:21         ` David Howells
@ 2003-05-17 12:30         ` Pavel Machek
  1 sibling, 0 replies; 21+ messages in thread
From: Pavel Machek @ 2003-05-17 12:30 UTC (permalink / raw)
  To: Linus Torvalds, David Howells, linux-kernel, linux-fsdevel,
	openafs-devel

Hi!

> > The advantage of associating the PAG with the real uid rather than make it
> > per-process is that it's a lot easier to administer that way, I think. You
> > don't need to log out or anything like that to have changes take effect
> > for your session, and it is very natural to say "this user now gets key
> > X". Which is what I think you really want when you do something like enter
> > a key to an encrypted filesystem, for example.
> 
> The local user id is not a 'trusted' identity for a distributed filesystem.
> Any user still have to prove his identity by obtaining tokens. 
> 
> If someone obtains my user id on in any way (i.e. weak password/
> bufferoverflow/ root exploit), he should not be allowed to use or access
> my tokens as he hasn't proven his identity. In this case he would either

? If he has same uid as you *and* you
have >=1 process running, what prevents
him from gdb attach <that process>,
and force it to do whatever he needs
by forcing syscall?
				Pavel
-- 
				Pavel
Written on sharp zaurus, because my Velo1 broke. If you have Velo you don't need...


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 17:23       ` Trond Myklebust
@ 2003-05-15 11:41         ` Ingo Oeser
  0 siblings, 0 replies; 21+ messages in thread
From: Ingo Oeser @ 2003-05-15 11:41 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Linus Torvalds, David Howells, linux-kernel, linux-fsdevel,
	openafs-devel

On Tue, May 13, 2003 at 07:23:27PM +0200, Trond Myklebust wrote:
> Under normal circumstances, changing real uid/gid should involve
> changing your PAG, but it doesn't necessarily have to do so.

It should not change your PAG, because otherwise a key principle
of PAGs (root is not able to get into a PAG with "su $UID") is lost.

Regards

Ingo Oeser

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 16:12   ` David Howells
@ 2003-05-13 20:23     ` Christoph Hellwig
  0 siblings, 0 replies; 21+ messages in thread
From: Christoph Hellwig @ 2003-05-13 20:23 UTC (permalink / raw)
  To: David Howells
  Cc: Christoph Hellwig, David Howells, torvalds, linux-kernel,
	linux-fsdevel, openafs-devel

On Tue, May 13, 2003 at 05:12:36PM +0100, David Howells wrote:
> At what level? There're two levels of mux...
> 
>  (1) four syscalls
> 
>  (2) 30-ish pioctls, 40-ish control functions and 20-ish ICL functions.
> 
> If I go for (2) as I think you're suggesting, that's on the order of at least
> 90 new syscalls, probably more when Arla's additions are included...

As a first patch two pag syscalls.  After that please post an implementation
for every additional syscall you need to this list (or groups thereof) after
you moved most to an afsctl fs.


> > and do you really think this is a 2.6 thing?
> 
> Yes.

Care to explain why?  


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
       [not found] <20030513182950.GB30766@delft.aura.cs.cmu.edu>
@ 2003-05-13 18:53 ` David Howells
  0 siblings, 0 replies; 21+ messages in thread
From: David Howells @ 2003-05-13 18:53 UTC (permalink / raw)
  To: Jan Harkes
  Cc: David Howells, Linus Torvalds, linux-kernel, linux-fsdevel,
	openafs-devel


> On Tue, May 13, 2003 at 06:42:49PM +0100, David Howells wrote:
> > I have a further suggestion for some new system calls specifically for
> > managing tokens. I'd like to add the following:
> > 
> >  (1) settok(const char *fs, const char *key, size_t size, const void *data)
> > 
> >      Present data to the named filesystem as being the authentication
> >      token for the specified key (eg: an AFS cell). If accepted, this
> >      token should be stored in the PAG to which the calling process
> >      belongs.
> 
> Tokens are very filesystem specific, with Coda the kernel really doesn't
> know in which 'realm' any object is located. So it will always just pass
> the PAG to the cache manager which will associate the right token to
> access the file.

OTOH, since settok wouldn't modify the PAG ring directly, but would rather
jump through a vector in the file_system_type object, it could deside to
confer with userspace and possibly not add a token at all.

Of course then the other calls would also have to vector through the
file_system_type object rather than affecting the PAG directly, but I'm happy
to do that.

> A minimal kernel implementation really only has a single newpag()
> systemcall and internal 'getpag()' that can be used by filesystems. All
> the token handling can be done by a (possibly filesystem specific)
> userspace daemon that is given a (pag,realm/key) tuple and returns a
> token.
>
> But ofcourse Coda is a 99% userspace implementation, so I'm already
> assuming that there are one or more userspace daemons.

This would just be a "standardised" way of communicating authentication info
to a filesystem. What the filesystem then did with it would be up to that
filesystem. Storing a token in a PAG or attached to a struct file would then
just be one option.
 
> >  (2) gettok(const char *fs, const char *key, size_t size, void *buffer)
> > 
> >      Get a copy of an authentication token.
> 
> Not sure what the use of this is for userspace. I can see how your
> kernel module would use it.

OpenAFS has it, but I'm not sure what uses it.

> btw. is the 'size_t size' the size of the key, or the size of the buffer?

Actually the buffer size. I was assuming the key would be a NUL terminated
string, but it's probably best not to assume that, and I should add an extra
argument for the key size.

Filesystem name can remain a NUL terminated string as I'd have to search the
file_system_type structures.

> >  (4) cleartoks(const char *fs)
> 
> Isn't this simply deltok(fs, NULL)?

Probably.

David

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 17:20       ` Jan Harkes
@ 2003-05-13 18:21         ` David Howells
  2003-05-17 12:30         ` Pavel Machek
  1 sibling, 0 replies; 21+ messages in thread
From: David Howells @ 2003-05-13 18:21 UTC (permalink / raw)
  To: Jan Harkes, Linus Torvalds, David Howells, linux-kernel,
	linux-fsdevel, openafs-devel


> Right, if some process/user opens a file and then passes the descriptor
> to another process/user which closes it. The close should operate under
> the same permissions as the original opener.

As long as the token isn't explicitly withdrawn. With my token structure, I've
defined it such that if the list_head in the token struct is ever empty, then
the token is withdrawn.

Furthermore, I'm considering it such that the the filesystem will select a
token from the PAG's token ring in the file_operations->open method and will
attach it to the file->f_token at that point for quick reference later.

> If someone obtains my user id on in any way (i.e. weak password/
> bufferoverflow/ root exploit), he should not be allowed to use or access
> my tokens as he hasn't proven his identity. In this case he would either
> still be in his original process authentication group, or a new and
> empty PAG. But definitely not in any of my authentication groups.
> 
> Which is also why joining a PAG should never be allowed.

Someone asked for it, but I suspect if allowed at all it may be best that this
ability is governed by its own capability bit and also that the security
interface should be consulted.

David

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 16:47     ` Linus Torvalds
  2003-05-13 17:20       ` Jan Harkes
  2003-05-13 17:23       ` Trond Myklebust
@ 2003-05-13 17:42       ` David Howells
  2 siblings, 0 replies; 21+ messages in thread
From: David Howells @ 2003-05-13 17:42 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: David Howells, linux-kernel, linux-fsdevel, openafs-devel


Linux Torvalds wrote:
> I htink the table-driven approach is just generally pretty nasty, even if
> it's clever and concise. I personally prefer a table of function pointers 
> over a table of constraints.

I assume you're talking about the AFS mux. I'm dropping that for the moment as
it's provoking a touch of dissatisfaction.

> The Linux way of doing things is to have a per-object "operations
> structure", which is not an anonymous table (array of identical entries),

I'd prefer to add a pioctl method to inode_operations, but I don't think Al
Viro would go for it.

I'd also like to totally redefine the AFS syscall interface (which is _very_
messy as you can see), but that would involve changing both OpenAFS and Arla
userpsaces as well... Oh well, I'll save the flamebait for another day
(wouldn't want Christoph to get bored now:-).

> > Maybe... There are arguments either way, but if the token ring is kept in
> > struct user, a task can't detach from it and pass a token-less set of keys
> > onto another process it wants to run.
> 
> Oh, but it can.

Your suggesting having multiple user structs with the same UID?

> The pointer should be kept at two places: the "struct task_struct" contains
> the initial pre-process value, and at file open time that pointer should get
> copied (and the user count incremented) and put into the "struct file".

The pointer to what? The user struct? I've put a pointer to the PAG struct in
the task_struct, and a pointer to the particular token governing that file
access point in struct file. I did this because I don't want to have to search
the token ring every time readpage is called for instance.

> So you can pass the set of keys the way UNIX _always_ passes rights - 
> through the file descriptor.

Surely you'd only want the minimum set of credentials (preferably a single
token) attached to the file descriptor rather than the whole ring to avoid
having to needlessly repeat searches.

> > Also, using a separate PAG structure means that you can lend your keys to
> > an SUID program and conversely it means a SUID program can't so easily
> > gain access to keys it didn't inherit from its caller.
> 
> "task->user" always follows uid ("real uid"), and as such you can always
> switch back and forth by just changing uid.

So anyone who has the ability to get root on a box can immediately use other
peoples keys with su... OTOH, the ability to get root would normally permit
someone sufficiently motivated to get this anyway.

> The advantage of associating the PAG with the real uid rather than make it
> per-process is that it's a lot easier to administer that way, I think. You
> don't need to log out or anything like that to have changes take effect
> for your session, and it is very natural to say "this user now gets key
> X". Which is what I think you really want when you do something like enter
> a key to an encrypted filesystem, for example.

It's not per-process. If a process calls setpag(-1) to create a new PAG (the
sort of thing PAM would do for login), then all processes that it creates and
their children, and their children's children, etc. would share in that PAG
unless they themselves called setpag() to leave it.

So you wouldn't have to log out for an additional token to take effect.

Normally a PAG would be coincident with a login session. It might be worth
making SUID programs automatically invest in an empty PAG, but with a way to
revert to the original PAG.

> Put another way: you'd usually add the PAG's at filesystem _mount_ time,
> no? And at that point you'd usually want to add it "retroactively" to the 
> session processes that caused the mount to happen, no? Not just to the 
> children of the mount.

You'd want to be able to do it before, during and/or after. PAGs permit this.

I have a further suggestion for some new system calls specifically for
managing tokens. I'd like to add the following:

 (1) settok(const char *fs, const char *key, size_t size, const void *data)

     Present data to the named filesystem as being the authentication token
     for the specified key (eg: an AFS cell). If accepted, this token should
     be stored in the PAG to which the calling process belongs.

 (2) gettok(const char *fs, const char *key, size_t size, void *buffer)

     Get a copy of an authentication token.

 (3) deltok(const char *fs, const char *key)

     Delete an authentication token.

 (4) cleartoks(const char *fs)

     Clear all the tokens belonging to a particular filesystem.

These would work by calling operations in struct file_system_type for (1) at
least and either doing that for (2) - (4) or by dealing with the PAG (or
whatever) directly.

David

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 16:47     ` Linus Torvalds
  2003-05-13 17:20       ` Jan Harkes
@ 2003-05-13 17:23       ` Trond Myklebust
  2003-05-15 11:41         ` Ingo Oeser
  2003-05-13 17:42       ` David Howells
  2 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2003-05-13 17:23 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: David Howells, linux-kernel, linux-fsdevel, openafs-devel

>>>>> " " == Linus Torvalds <torvalds@transmeta.com> writes:

     > Put another way: you'd usually add the PAG's at filesystem
     > _mount_ time, no? And at that point you'd usually want to add
     > it "retroactively" to the session processes that caused the
     > mount to happen, no? Not just to the children of the mount.

Think of PAGs as "session"-style management of credentials. If you
want to add/remove a credential without sharing that operation with
all the other processes that are currently in the same session/PAG
then you change PAGs.
Otherwise, the credential operation affects all processes in the same
PAG.

Under normal circumstances, changing real uid/gid should involve
changing your PAG, but it doesn't necessarily have to do so.

Cheers,
  Trond

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 16:47     ` Linus Torvalds
@ 2003-05-13 17:20       ` Jan Harkes
  2003-05-13 18:21         ` David Howells
  2003-05-17 12:30         ` Pavel Machek
  2003-05-13 17:23       ` Trond Myklebust
  2003-05-13 17:42       ` David Howells
  2 siblings, 2 replies; 21+ messages in thread
From: Jan Harkes @ 2003-05-13 17:20 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: David Howells, linux-kernel, linux-fsdevel, openafs-devel

On Tue, May 13, 2003 at 09:47:59AM -0700, Linus Torvalds wrote:
> > Maybe... There are arguments either way, but if the token ring is kept in
> > struct user, a task can't detach from it and pass a token-less set of keys
> > onto another process it wants to run.
> 
> Oh, but it can. The pointer should be kept at two places: the "struct 
> task_struct" contains the initial pre-process value, and at file open time 
> that pointer should get copied (and the user count incremented) and put 
> into the "struct file". 
> 
> So you can pass the set of keys the way UNIX _always_ passes rights - 
> through the file descriptor. 

Right, if some process/user opens a file and then passes the descriptor
to another process/user which closes it. The close should operate under
the same permissions as the original opener.

Simple example when this happens is when a setuid application is started
and the stdout/stderr is redirected to a file. 'ping foo >out 2>&1'. The
shell opens the file under my uid, but the setuid application closes it
under the new uid.

> The advantage of associating the PAG with the real uid rather than make it
> per-process is that it's a lot easier to administer that way, I think. You
> don't need to log out or anything like that to have changes take effect
> for your session, and it is very natural to say "this user now gets key
> X". Which is what I think you really want when you do something like enter
> a key to an encrypted filesystem, for example.

The local user id is not a 'trusted' identity for a distributed filesystem.
Any user still have to prove his identity by obtaining tokens. 

If someone obtains my user id on in any way (i.e. weak password/
bufferoverflow/ root exploit), he should not be allowed to use or access
my tokens as he hasn't proven his identity. In this case he would either
still be in his original process authentication group, or a new and
empty PAG. But definitely not in any of my authentication groups.

Which is also why joining a PAG should never be allowed.

Any arguments about 'it avoids the cost of obtaining credentials' are
stupid because that cost is exactly what it takes to prove that a new
session is in fact associated with a specific user. If our identity
already was proven beyond reasonable doubt, we clearly already have our
'token' and the additional cost to associate this with the new PAG is
zero.

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 16:39   ` Jeff Garzik
@ 2003-05-13 16:57     ` David Howells
  0 siblings, 0 replies; 21+ messages in thread
From: David Howells @ 2003-05-13 16:57 UTC (permalink / raw)
  To: Jeff Garzik
  Cc: Christoph Hellwig, David Howells, torvalds, linux-kernel,
	linux-fsdevel, openafs-devel


> AFS is annoying and painful no matter how you look at it.  :/
> 
> But I don't think 90+ new syscalls is the answer, even for 2.7.

I think it's going to be either

 (1) try to maintain compatibility with OpenAFS and Arla's current syscall
     setup

 (2) totally rewrite the interface and tell OpenAFS/Arla they have to change
     too.

In the case of (2), I think the AFS operations would best be emulated by a
combination of the following means:

 (1) Use the setpag() syscall in my patch.

 (2) Add syscalls for managing tokens on a general filesystem-by-filesystem
     basis (only need four ops: set, get, delete and clear-all). These could
     work through operations in struct file_system_type.

 (3) Work through sysfs files for fs-specific control functions. Things like
     adding cells would come into this category.

 (4) Emulate as much as many of the inode-requiring pioctl calls as possible
     with xattr syscalls.

However, this leaves at least one that doesn't fit into any of the above
categories. VIOC_STAT_MT_PT takes an inode, and so should come into (4) except
that the xattr key size isn't sufficiently capacious.

There are two ways to deal with this:

 (1) Add an actual pioctl syscall as a top-level syscall and make it either
     call a pioctl method in inode_operations or maybe have it fake a dentry
     and file and call file_operations->ioctl.

 (2) Open the directory holding the mountpoint and make an ioctl that aims at
     the mountpoint in question.

David

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 15:44   ` Alan Cox
  2003-05-13 16:52     ` Jan Harkes
@ 2003-05-13 16:57     ` Linus Torvalds
  2003-05-13 16:39       ` Alan Cox
  1 sibling, 1 reply; 21+ messages in thread
From: Linus Torvalds @ 2003-05-13 16:57 UTC (permalink / raw)
  To: Alan Cox
  Cc: David Howells, Linux Kernel Mailing List, linux-fsdevel, openafs-devel


On 13 May 2003, Alan Cox wrote:
>
> With something like SELinux a PAG may belong to a role not to a user
> even though other limits like processes probably belong to the user as a
> whole. 

Hmm.. That doesn't make a lot of sense to me.

A "user" is by definition what the unix filesystem considers to be the
"atom of security". In fact, a "user" has no other meaning - except for
the notion of "root", which is obviously special and has meaning outside
of the scope of filesystems (and even here capabilities have tried to
separate out that meaning from the "user" definition).

But if we want to split up users into "roles", then sure, we can have a
"role" that is shared across processes. But I think that for _usability_
we really want that to be _shared_ by default, and anybody who wants to
split it should have to work at it. Exactly so that when you log in, and
use your private key to mount some encrypted volume, _all_ your processes
should by default get access to it. Even if the other ones were
independent logins (another window with another ssh session to that
machine).

In other words: I really think usability should count very high on the 
list of requirements. Much higher than SELinux.

		Linus


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 15:44   ` Alan Cox
@ 2003-05-13 16:52     ` Jan Harkes
  2003-05-13 16:57     ` Linus Torvalds
  1 sibling, 0 replies; 21+ messages in thread
From: Jan Harkes @ 2003-05-13 16:52 UTC (permalink / raw)
  To: Linux Kernel Mailing List, linux-fsdevel

On Tue, May 13, 2003 at 04:44:24PM +0100, Alan Cox wrote:
> On Maw, 2003-05-13 at 16:52, Linus Torvalds wrote:
> > I think the code looks pretty horrible, but I think we'll need something
> > like this to keep track of keys. However, I'm not sure we should make this
> > a new structure - I think we should make the current "tsk->user" thing
> > _be_ the "PAG". 
> 
> With something like SELinux a PAG may belong to a role not to a user
> even though other limits like processes probably belong to the user as a
> whole. 
> 
> How does AFS currently handle this, can two logins of the same user have
> seperate PAGs ?

Yes, easily and it is useful.

I can start an xterm a new PAG that has administrative rights. Or
various system daemons all running with uid 0, but with different access
rights (sendmail/webserver).

It also goes the other way around, different uid's using the same PAG,
if I run a setuid application it can still access my files.

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 16:05   ` David Howells
@ 2003-05-13 16:47     ` Linus Torvalds
  2003-05-13 17:20       ` Jan Harkes
                         ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Linus Torvalds @ 2003-05-13 16:47 UTC (permalink / raw)
  To: David Howells; +Cc: David Howells, linux-kernel, linux-fsdevel, openafs-devel


On Tue, 13 May 2003, David Howells wrote:
> > 
> > I think the code looks pretty horrible,
> 
> Any particular bits?

I htink the table-driven approach is just generally pretty nasty, even if
it's clever and concise. I personally prefer a table of function pointers 
over a table of constraints.

Right now you have one table of constraints, and then later on when you
actually implement the code that _does_ anything, you'll end up having
another table (or switch statment) per filesystem of actual actions.  And
neither with any kind of compile-time type checking, since the actual
interfaces to pass stuff around are just extended ioctls.

The Linux way of doing things is to have a per-object "operations
structure", which is not an anonymous table (array of identical entries),
but a list of unique entries (a structure of strongly typed function
pointers). And then you never de-multiplex (which inherently loses type
information), or you at least do it only _once_ and make sure that you
stronly type things at that point so that any downstream stuff is typed.

> Maybe... There are arguments either way, but if the token ring is kept in
> struct user, a task can't detach from it and pass a token-less set of keys
> onto another process it wants to run.

Oh, but it can. The pointer should be kept at two places: the "struct 
task_struct" contains the initial pre-process value, and at file open time 
that pointer should get copied (and the user count incremented) and put 
into the "struct file". 

So you can pass the set of keys the way UNIX _always_ passes rights - 
through the file descriptor. 

> Also, using a separate PAG structure means that you can lend your keys to an
> SUID program and conversely it means a SUID program can't so easily gain
> access to keys it didn't inherit from its caller.

"task->user" always follows uid ("real uid"), and as such you can always
switch back and forth by just changing uid.

The advantage of associating the PAG with the real uid rather than make it
per-process is that it's a lot easier to administer that way, I think. You
don't need to log out or anything like that to have changes take effect
for your session, and it is very natural to say "this user now gets key
X". Which is what I think you really want when you do something like enter
a key to an encrypted filesystem, for example.

Put another way: you'd usually add the PAG's at filesystem _mount_ time,
no? And at that point you'd usually want to add it "retroactively" to the 
session processes that caused the mount to happen, no? Not just to the 
children of the mount.

			Linus


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 16:03 ` Christoph Hellwig
  2003-05-13 16:12   ` David Howells
@ 2003-05-13 16:39   ` Jeff Garzik
  2003-05-13 16:57     ` David Howells
  1 sibling, 1 reply; 21+ messages in thread
From: Jeff Garzik @ 2003-05-13 16:39 UTC (permalink / raw)
  To: Christoph Hellwig, David Howells, torvalds, linux-kernel,
	linux-fsdevel, openafs-devel

On Tue, May 13, 2003 at 05:03:17PM +0100, Christoph Hellwig wrote:
> On Tue, May 13, 2003 at 04:39:20PM +0100, David Howells wrote:
> >  (3) AFS multiplexor support. Not complete at the moment, but implemented far
> >      enough to provide access to the PAG mechanism. Further patches will be
> >      forthcoming to make this fully functional.
> 
> This is broken.  Please add individual syscalls instead of yet another broken
> multiplexer.

AFS is annoying and painful no matter how you look at it.  :/

But I don't think 90+ new syscalls is the answer, even for 2.7.

File this under "design mistake we must live with", much like
ioctl(2) itself.

	Jeff




^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 16:57     ` Linus Torvalds
@ 2003-05-13 16:39       ` Alan Cox
  0 siblings, 0 replies; 21+ messages in thread
From: Alan Cox @ 2003-05-13 16:39 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: David Howells, Linux Kernel Mailing List, linux-fsdevel, openafs-devel

On Maw, 2003-05-13 at 17:57, Linus Torvalds wrote:
> A "user" is by definition what the unix filesystem considers to be the
> "atom of security". In fact, a "user" has no other meaning - except for
> the notion of "root", which is obviously special and has meaning outside
> of the scope of filesystems (and even here capabilities have tried to
> separate out that meaning from the "user" definition).

This has been untrue since the security hooks went in, and in some
senses before that (capabilities mean multiple "root"'s)

> "role" that is shared across processes. But I think that for _usability_
> we really want that to be _shared_ by default, and anybody who wants to
> split it should have to work at it. Exactly so that when you log in, and
> use your private key to mount some encrypted volume, _all_ your processes
> should by default get access to it. Even if the other ones were
> independent logins (another window with another ssh session to that
> machine).
> 
> In other words: I really think usability should count very high on the 
> list of requirements. Much higher than SELinux.

Sounds right. PAM can certainly do the splitting providing the functionality
is there to do it


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 16:03 ` Christoph Hellwig
@ 2003-05-13 16:12   ` David Howells
  2003-05-13 20:23     ` Christoph Hellwig
  2003-05-13 16:39   ` Jeff Garzik
  1 sibling, 1 reply; 21+ messages in thread
From: David Howells @ 2003-05-13 16:12 UTC (permalink / raw)
  To: Christoph Hellwig, David Howells, torvalds, linux-kernel,
	linux-fsdevel, openafs-devel

> On Tue, May 13, 2003 at 04:39:20PM +0100, David Howells wrote:
> >  (3) AFS multiplexor support. Not complete at the moment, but implemented
> >      far enough to provide access to the PAG mechanism. Further patches
> >      will be forthcoming to make this fully functional.
> 
> This is broken.  Please add individual syscalls instead of yet another broken
> multiplexer.

At what level? There're two levels of mux...

 (1) four syscalls

 (2) 30-ish pioctls, 40-ish control functions and 20-ish ICL functions.

If I go for (2) as I think you're suggesting, that's on the order of at least
90 new syscalls, probably more when Arla's additions are included...

> and do you really think this is a 2.6 thing?

Yes.

David

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 15:52 ` Linus Torvalds
  2003-05-13 15:44   ` Alan Cox
@ 2003-05-13 16:05   ` David Howells
  2003-05-13 16:47     ` Linus Torvalds
  1 sibling, 1 reply; 21+ messages in thread
From: David Howells @ 2003-05-13 16:05 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: David Howells, linux-kernel, linux-fsdevel, openafs-devel


Linus Torvalds wrote:
> On Tue, 13 May 2003, David Howells wrote:
> > 
> >  (1) PAG (Process Authentication Group) support. A PAG is ID'd by a unique
> >      number, and is represented in memory as a structure that has a ring of
> >      associated authentication tokens.
> > 
> >      Each process can either be part of a PAG, or it can PAG-less - in
> >      which case it has no authentication tokens.
> > 
> >      Two new syscalls are added: setpag and getpag.
> 
> I think the code looks pretty horrible,

Any particular bits?

> but I think we'll need something like this to keep track of keys. However,
> I'm not sure we should make this a new structure - I think we should make
> the current "tsk->user" thing _be_ the "PAG".

Maybe... There are arguments either way, but if the token ring is kept in
struct user, a task can't detach from it and pass a token-less set of keys
onto another process it wants to run.

Also, using a separate PAG structure means that you can lend your keys to an
SUID program and conversely it means a SUID program can't so easily gain
access to keys it didn't inherit from its caller.

I'm not sure that the ability to arbitrarily join a PAG should be permitted,
but it was requested.

David

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 15:39 David Howells
  2003-05-13 15:52 ` Linus Torvalds
@ 2003-05-13 16:03 ` Christoph Hellwig
  2003-05-13 16:12   ` David Howells
  2003-05-13 16:39   ` Jeff Garzik
  1 sibling, 2 replies; 21+ messages in thread
From: Christoph Hellwig @ 2003-05-13 16:03 UTC (permalink / raw)
  To: David Howells; +Cc: torvalds, linux-kernel, linux-fsdevel, openafs-devel

On Tue, May 13, 2003 at 04:39:20PM +0100, David Howells wrote:
>  (3) AFS multiplexor support. Not complete at the moment, but implemented far
>      enough to provide access to the PAG mechanism. Further patches will be
>      forthcoming to make this fully functional.

This is broken.  Please add individual syscalls instead of yet another broken
multiplexer.

and do you really think this is a 2.6 thing?


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 15:39 David Howells
@ 2003-05-13 15:52 ` Linus Torvalds
  2003-05-13 15:44   ` Alan Cox
  2003-05-13 16:05   ` David Howells
  2003-05-13 16:03 ` Christoph Hellwig
  1 sibling, 2 replies; 21+ messages in thread
From: Linus Torvalds @ 2003-05-13 15:52 UTC (permalink / raw)
  To: David Howells; +Cc: linux-kernel, linux-fsdevel, openafs-devel


On Tue, 13 May 2003, David Howells wrote:
> 
>  (1) PAG (Process Authentication Group) support. A PAG is ID'd by a unique
>      number, and is represented in memory as a structure that has a ring of
>      associated authentication tokens.
> 
>      Each process can either be part of a PAG, or it can PAG-less - in which
>      case it has no authentication tokens.
> 
>      Two new syscalls are added: setpag and getpag.

I think the code looks pretty horrible, but I think we'll need something
like this to keep track of keys. However, I'm not sure we should make this
a new structure - I think we should make the current "tsk->user" thing
_be_ the "PAG". 

		Linus


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] in-core AFS multiplexor and PAG support
  2003-05-13 15:52 ` Linus Torvalds
@ 2003-05-13 15:44   ` Alan Cox
  2003-05-13 16:52     ` Jan Harkes
  2003-05-13 16:57     ` Linus Torvalds
  2003-05-13 16:05   ` David Howells
  1 sibling, 2 replies; 21+ messages in thread
From: Alan Cox @ 2003-05-13 15:44 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: David Howells, Linux Kernel Mailing List, linux-fsdevel, openafs-devel

On Maw, 2003-05-13 at 16:52, Linus Torvalds wrote:
> I think the code looks pretty horrible, but I think we'll need something
> like this to keep track of keys. However, I'm not sure we should make this
> a new structure - I think we should make the current "tsk->user" thing
> _be_ the "PAG". 

With something like SELinux a PAG may belong to a role not to a user
even though other limits like processes probably belong to the user as a
whole. 

How does AFS currently handle this, can two logins of the same user have
seperate PAGs ?


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH] in-core AFS multiplexor and PAG support
@ 2003-05-13 15:39 David Howells
  2003-05-13 15:52 ` Linus Torvalds
  2003-05-13 16:03 ` Christoph Hellwig
  0 siblings, 2 replies; 21+ messages in thread
From: David Howells @ 2003-05-13 15:39 UTC (permalink / raw)
  To: torvalds; +Cc: linux-kernel, linux-fsdevel, openafs-devel


Hi Linus,

Here's a patch to add three things that are required for AFS and that may be
of use to other stuff (such as NFSv4 and Samba):

 (1) PAG (Process Authentication Group) support. A PAG is ID'd by a unique
     number, and is represented in memory as a structure that has a ring of
     associated authentication tokens.

     Each process can either be part of a PAG, or it can PAG-less - in which
     case it has no authentication tokens.

     Two new syscalls are added: setpag and getpag.

 (2) Authentication token support. An authentication token is a blob of binary
     data that is keyed by filesystem name and fs-specific key (such as AFS
     cell name or SMB workgroup).

     These are retained in two places: each PAG has a ring of tokens
     appropriate to the group of processes within that PAG; and each struct
     file has a pointer to the single token governing that file (if there is
     one).

 (3) AFS multiplexor support. Not complete at the moment, but implemented far
     enough to provide access to the PAG mechanism. Further patches will be
     forthcoming to make this fully functional.

It is my intention to add Trond's vfs_cred stuff in at some point, but that
has a lot greater impact than this patch (which has negligible impact).

David

diff -uNr linux-2.5.69/arch/i386/kernel/entry.S linux-2.5.69-cred/arch/i386/kernel/entry.S
--- linux-2.5.69/arch/i386/kernel/entry.S	2003-05-06 15:06:47.000000000 +0100
+++ linux-2.5.69-cred/arch/i386/kernel/entry.S	2003-05-13 11:16:17.000000000 +0100
@@ -721,7 +721,7 @@
 	.long sys_bdflush
 	.long sys_sysfs		/* 135 */
 	.long sys_personality
-	.long sys_ni_syscall	/* reserved for afs_syscall */
+	.long sys_afs
 	.long sys_setfsuid16
 	.long sys_setfsgid16
 	.long sys_llseek	/* 140 */
@@ -852,6 +852,7 @@
  	.long sys_clock_gettime		/* 265 */
  	.long sys_clock_getres
  	.long sys_clock_nanosleep
- 
+	.long sys_setpag
+	.long sys_getpag
  
 nr_syscalls=(.-sys_call_table)/4
diff -uNr linux-2.5.69/fs/afssyscall.c linux-2.5.69-cred/fs/afssyscall.c
--- linux-2.5.69/fs/afssyscall.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.5.69-cred/fs/afssyscall.c	2003-05-13 16:02:47.000000000 +0100
@@ -0,0 +1,223 @@
+/* afssyscall.c: AFS system call multiplexor
+ *
+ * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/afs_syscall.h>
+#include <linux/linkage.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/cred.h>
+#include <asm/uaccess.h>
+
+static const struct {
+	u_int8_t	flags;
+#define VIOC_PATH_YES		0x01	/* must be a path */
+#define VIOC_PATH_NO		0x02	/* must not be a path */
+#define VIOC_FOLLOW_OPT		0x00	/* followsymlinks can be anything */
+#define VIOC_FOLLOW_NO		0x04	/* followsymlinks must be false */
+#define VIOC_IN_OPT		0x00	/* input data optional */
+#define VIOC_IN_YES		0x08	/* input data required */
+#define VIOC_IN_NO		0x10	/* input data prohibited */
+#define VIOC_OUT_OPT		0x00	/* output buffer optional */  
+#define VIOC_OUT_YES		0x20	/* output buffer required */  
+#define VIOC_OUT_NO		0x40	/* output buffer prohibited */
+#define VIOC_VALID		0x80	/* entry is valid */
+
+} afs_vioc_table[] = {
+
+#define _v(NAME,P,F,I,O) [NAME] = \
+{ VIOC_VALID | VIOC_PATH_##P | VIOC_FOLLOW_##F | VIOC_IN_##I | VIOC_OUT_##O }
+
+	/* command name                  path folw  in  out */
+	_v(__VIOCSETAL			, YES, OPT, YES, NO ),
+	_v(__VIOCGETAL			, YES, OPT, NO , YES),
+	_v(__VIOCSETTOK			, NO , NO , YES, NO ),
+	_v(__VIOCGETVOLSTAT		, YES, OPT, NO , YES),
+	_v(__VIOCSETVOLSTAT		, YES, OPT, YES, NO ),
+	_v(__VIOCFLUSH			, YES, OPT, NO , NO ),
+	_v(__VIOCGETTOK			, NO , NO , YES, YES),
+	_v(__VIOCUNLOG			, NO , NO , NO , NO ),
+	_v(__VIOCCKSERV			, NO , NO , OPT, OPT),
+	_v(__VIOCCKBACK			, NO , NO , NO , NO ),
+	_v(__VIOCCKCONN			, NO , NO , NO , YES),
+	_v(__VIOCWHEREIS		, YES, OPT, NO , YES),
+	_v(__VIOCACCESS			, YES, OPT, YES, NO ),
+	_v(__VIOCUNPAG			, NO , NO , NO , NO ),
+	_v(__VIOCGETFID			, YES, OPT, NO , YES),
+	_v(__VIOCSETCACHESIZE		, NO , NO , YES, NO ),
+	_v(__VIOCFLUSHCB		, YES, OPT, NO , NO ),
+	_v(__VIOCNEWCELL		, NO , NO , YES, NO ),
+	_v(__VIOCGETCELL		, NO , NO , YES, YES),
+	_v(__VIOC_DELETE_MT_PT		, YES, NO , YES, NO ),
+	_v(__VIOC_STAT_MT_PT		, YES, OPT, YES, YES), /* !! can't use xattr */
+	_v(__VIOC_FILE_CELL_NAME	, YES, OPT, NO , YES),
+	_v(__VIOC_GET_WS_CELL		, NO , NO , NO , YES),
+	_v(__VIOC_AFS_MARINER_HOST	, NO , NO , YES, OPT),
+	_v(__VIOC_GET_PRIMARY_CELL	, NO , NO , NO , YES),
+	_v(__VIOC_VENUSLOG		, NO , NO , YES, OPT),
+	_v(__VIOC_GETCELLSTATUS		, NO , NO , YES, YES),
+	_v(__VIOC_SETCELLSTATUS		, NO , NO , YES, NO ),
+	_v(__VIOC_FLUSHVOLUME		, YES, OPT, NO , NO ),
+	_v(__VIOC_AFS_SYSNAME		, NO , NO , YES, OPT),
+	_v(__VIOC_EXPORTAFS		, NO , NO , YES, OPT),
+	_v(__VIOCGETCACHEPARMS		, NO , NO , NO , YES),
+
+};
+
+static int vfs_pioctl(const char __user *path, long cmd, void __user *arg, int followsymlinks);
+static int vfs_pioctl_inode(struct nameidata *nd, long cmd, struct afs_ioctl *ioc);
+static int vfs_pioctl_fs(long cmd, struct afs_ioctl *ioc);
+
+/*****************************************************************************/
+/*
+ * AFS syscall multiplexor
+ */
+int sys_afs(long subsyscall, long p1, long p2, long p3, long p4, long p5)
+{
+	switch (subsyscall) {
+	case AFS_SYSCALL_PIOCTL:	return vfs_pioctl((const char*)p1,p2,(void*)p3,p4);
+	case AFS_SYSCALL_SETPAG:	return sys_setpag(-1);
+	case AFS_SYSCALL_CALL:		return -ENOSYS;
+	case AFS_SYSCALL_ICL:		return -ENOSYS;
+	default:			return -ENOSYS;
+	}
+
+} /* end sys_afs() */
+
+/*****************************************************************************/
+/*
+ * handle path-based ioctl
+ */
+static int vfs_pioctl(const char __user *path, long cmd, void __user *arg, int followsymlinks)
+{
+	struct afs_ioctl uioc, ioc;
+	struct nameidata nd;
+	void *data;
+	char flags;
+	int err;
+
+	if (_IOC_NR(cmd) < 0 || _IOC_NR(cmd) >= sizeof(afs_vioc_table)/sizeof(afs_vioc_table[0]))
+		return -EINVAL;
+	flags = afs_vioc_table[_IOC_NR(cmd)].flags;
+
+	if (!(flags & VIOC_VALID))
+		return -EINVAL;
+
+	if ((flags & VIOC_PATH_NO	&& path			) ||
+	    (flags & VIOC_PATH_YES	&& !path		) ||
+	    (flags & VIOC_FOLLOW_NO	&& followsymlinks	))
+		return -EINVAL;
+
+	if (copy_from_user(&uioc,arg,sizeof(uioc)) != 0)
+		return -EFAULT;
+
+	if ((flags & VIOC_IN_YES	&& !uioc.in	) ||
+	    (flags & VIOC_IN_NO		&& uioc.in	) ||
+	    (flags & VIOC_OUT_YES	&& !uioc.out	) ||
+	    (flags & VIOC_OUT_NO	&& uioc.out	))
+		return -EINVAL;
+
+	if (uioc.in_size<0)
+		ioc.in_size = 0;
+	else if (uioc.in_size > PAGE_SIZE/2)
+		return -E2BIG;
+	else
+		ioc.in_size = uioc.in_size;
+
+	if (uioc.out_size<0)
+		ioc.out_size = 0;
+	else if (uioc.out_size > PAGE_SIZE/2)
+		return -E2BIG;
+	else
+		ioc.out_size = uioc.out_size;
+
+	if ((flags & VIOC_IN_YES	&& ioc.in_size<=0	) ||
+	    (flags & VIOC_IN_NO		&& ioc.in_size>0	) ||
+	    (flags & VIOC_OUT_YES	&& ioc.out_size<=0	) ||
+	    (flags & VIOC_OUT_NO	&& ioc.out_size>0	))
+		return -EINVAL;
+
+	data = (void*) get_zeroed_page(GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	ioc.in	= ioc.in_size>0  ? data : NULL;
+	ioc.out	= ioc.out_size>0 ? data + PAGE_SIZE/2 : NULL;
+
+	if (ioc.in && copy_from_user(ioc.in,uioc.in,ioc.in_size) != 0) {
+		free_page((unsigned long)data);
+		return -EFAULT;
+	}
+
+	/* perform the appropriate action */
+	if (!path) {
+		err = vfs_pioctl_fs(cmd,&ioc);
+	}
+	else {
+		unsigned flags = followsymlinks ? LOOKUP_FOLLOW : 0;
+
+		err = __user_walk(path,flags,&nd);
+		if (!err) {
+			err = vfs_pioctl_inode(&nd,cmd,&ioc);
+			path_release(&nd);
+		}
+	}
+
+	/* write back the result, zero padding to the size of the buffer */
+	if (err==0 || uioc.out_size) {
+		if (copy_to_user(uioc.out,ioc.out,uioc.out_size) != 0)
+			err = -EFAULT;
+	}
+
+	free_page((unsigned long)data);
+	return err;
+
+} /* end vfs_pioctl() */
+
+/*****************************************************************************/
+/*
+ * pioctls that require an inode
+ */
+static int vfs_pioctl_inode(struct nameidata *nd, long cmd, struct afs_ioctl *ioc)
+{
+#if 0
+	printk("vfs_pioctl_inode(%s,%ld,{is=%hd,os=%hd})\n",
+	       nd->dentry->d_name.name,_IOC_NR(cmd),ioc->in_size,ioc->out_size);
+#endif
+
+	return -EINVAL; /* not yet complete */
+
+} /* end vfs_pioctl_inode() */
+
+/*****************************************************************************/
+/*
+ * pioctls that don't require an inode, but rather work on the FS as a whole
+ */
+static int vfs_pioctl_fs(long cmd, struct afs_ioctl *ioc)
+{
+#if 0
+	printk("vfs_pioctl_fs(%ld,{is=%hd,os=%hd})\n",
+	       _IOC_NR(cmd),ioc->in_size,ioc->out_size);
+#endif
+
+	switch (cmd) {
+	case VIOCUNPAG:
+	case VIOCUNLOG:
+		ioc->out_size = 0;
+		vfs_unpag("afs");
+		return 0;
+	default:
+		return -EINVAL; /* not yet complete */
+	}
+
+} /* end vfs_pioctl_fs() */
diff -uNr linux-2.5.69/fs/file_table.c linux-2.5.69-cred/fs/file_table.c
--- linux-2.5.69/fs/file_table.c	2003-05-06 15:04:45.000000000 +0100
+++ linux-2.5.69-cred/fs/file_table.c	2003-05-13 13:54:58.000000000 +0100
@@ -166,6 +166,7 @@
 	if (file->f_op && file->f_op->release)
 		file->f_op->release(inode, file);
 	security_file_free(file);
+	if (file->f_token) vfs_token_put(file->f_token);
 	fops_put(file->f_op);
 	if (file->f_mode & FMODE_WRITE)
 		put_write_access(inode);
diff -uNr linux-2.5.69/fs/Makefile linux-2.5.69-cred/fs/Makefile
--- linux-2.5.69/fs/Makefile	2003-05-06 15:04:46.000000000 +0100
+++ linux-2.5.69-cred/fs/Makefile	2003-05-13 11:18:20.000000000 +0100
@@ -10,7 +10,8 @@
 		namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \
 		dcache.o inode.o attr.o bad_inode.o file.o dnotify.o \
 		filesystems.o namespace.o seq_file.o xattr.o libfs.o \
-		fs-writeback.o mpage.o direct-io.o aio.o eventpoll.o
+		fs-writeback.o mpage.o direct-io.o aio.o eventpoll.o \
+		afssyscall.o
 
 obj-$(CONFIG_COMPAT) += compat.o
 
diff -uNr linux-2.5.69/fs/open.c linux-2.5.69-cred/fs/open.c
--- linux-2.5.69/fs/open.c	2003-05-06 15:04:45.000000000 +0100
+++ linux-2.5.69-cred/fs/open.c	2003-05-13 11:28:08.000000000 +0100
@@ -46,7 +46,7 @@
 	struct nameidata nd;
 	int error;
 
-	error = user_path_walk(path, &nd);
+	error = user_path_walk(path,&nd);
 	if (!error) {
 		struct statfs tmp;
 		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
diff -uNr linux-2.5.69/fs/proc/array.c linux-2.5.69-cred/fs/proc/array.c
--- linux-2.5.69/fs/proc/array.c	2003-05-06 15:07:08.000000000 +0100
+++ linux-2.5.69-cred/fs/proc/array.c	2003-05-13 10:58:56.000000000 +0100
@@ -154,13 +154,14 @@
 	read_lock(&tasklist_lock);
 	buffer += sprintf(buffer,
 		"State:\t%s\n"
+		"Pag:\t%d\n"
 		"Tgid:\t%d\n"
 		"Pid:\t%d\n"
 		"PPid:\t%d\n"
 		"TracerPid:\t%d\n"
 		"Uid:\t%d\t%d\t%d\t%d\n"
 		"Gid:\t%d\t%d\t%d\t%d\n",
-		get_task_state(p), p->tgid,
+		get_task_state(p), p->vfspag ? p->vfspag->pag : 0, p->tgid,
 		p->pid, p->pid ? p->real_parent->pid : 0,
 		p->pid && p->ptrace ? p->parent->pid : 0,
 		p->uid, p->euid, p->suid, p->fsuid,
diff -uNr linux-2.5.69/include/asm-i386/posix_types.h linux-2.5.69-cred/include/asm-i386/posix_types.h
--- linux-2.5.69/include/asm-i386/posix_types.h	2003-05-06 15:04:37.000000000 +0100
+++ linux-2.5.69-cred/include/asm-i386/posix_types.h	2003-05-12 10:19:15.000000000 +0100
@@ -13,6 +13,7 @@
 typedef unsigned short	__kernel_nlink_t;
 typedef long		__kernel_off_t;
 typedef int		__kernel_pid_t;
+typedef int		__kernel_pag_t;
 typedef unsigned short	__kernel_ipc_pid_t;
 typedef unsigned short	__kernel_uid_t;
 typedef unsigned short	__kernel_gid_t;
diff -uNr linux-2.5.69/include/asm-i386/unistd.h linux-2.5.69-cred/include/asm-i386/unistd.h
--- linux-2.5.69/include/asm-i386/unistd.h	2003-05-06 15:04:37.000000000 +0100
+++ linux-2.5.69-cred/include/asm-i386/unistd.h	2003-05-13 10:47:59.000000000 +0100
@@ -273,8 +273,10 @@
 #define __NR_clock_gettime	(__NR_timer_create+6)
 #define __NR_clock_getres	(__NR_timer_create+7)
 #define __NR_clock_nanosleep	(__NR_timer_create+8)
+#define __NR_setpag		268
+#define __NR_getpag		269
 
-#define NR_syscalls 268
+#define NR_syscalls 270
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 
diff -uNr linux-2.5.69/include/linux/afs_syscall.h linux-2.5.69-cred/include/linux/afs_syscall.h
--- linux-2.5.69/include/linux/afs_syscall.h	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.5.69-cred/include/linux/afs_syscall.h	2003-05-13 13:30:15.000000000 +0100
@@ -0,0 +1,159 @@
+/* afs_syscall.h: AFS system call constants and structures
+ *
+ * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef _LINUX_AFS_SYSCALL_H
+#define _LINUX_AFS_SYSCALL_H
+
+/* level one multiplexor */
+#define AFS_SYSCALL_PIOCTL	20
+#define AFS_SYSCALL_SETPAG	21
+#define AFS_SYSCALL_CALL	28
+#define AFS_SYSCALL_ICL		30
+
+/* level two multiplexor for pioctl call */
+#define __VIOCSETAL			1	/* set the ACL on a directory */
+#define __VIOCGETAL			2	/* get the ACL on a directory */
+#define __VIOCSETTOK			3	/* set the caller's token for a cell */
+#define __VIOCGETVOLSTAT		4	/* get volume status */
+#define __VIOCSETVOLSTAT		5	/* set volume status */
+#define __VIOCFLUSH			6	/* flush an object from the cache */
+#define __VIOCGETTOK			8	/* get the caller's token for a cell */
+#define __VIOCUNLOG			9	/* discard authentication information */
+#define __VIOCCKSERV			10	/* check status of 1+ file servers */
+#define __VIOCCKBACK			11	/* mark cached volume info as stale */
+#define __VIOCCKCONN			12	/* check caller's tokens/connections */
+#define __VIOCWHEREIS			13	/* find host for a volume */
+#define __VIOCACCESS			20	/* check caller's access on an object */
+#define __VIOCUNPAG			21	/* discard authentication information */
+#define __VIOCGETFID			22	/* get FID for an object */
+#define __VIOCSETCACHESIZE		24	/* set max cache size*/
+#define __VIOCFLUSHCB			25	/* unilaterally drop a callback */
+#define __VIOCNEWCELL			26	/* set cell service information */
+#define __VIOCGETCELL			27	/* get cell configuration entry */
+#define __VIOC_DELETE_MT_PT		28	/* delete a mount point */
+#define __VIOC_STAT_MT_PT		29	/* get contents of a mount point */
+#define __VIOC_FILE_CELL_NAME		30	/* get cell hosting a given object */
+#define __VIOC_GET_WS_CELL		31	/* get caller's home cell name */
+#define __VIOC_AFS_MARINER_HOST		32	/* get/set file transfer monitoring o/p */
+#define __VIOC_GET_PRIMARY_CELL		33	/* get caller's primary cell */
+#define __VIOC_VENUSLOG			34	/* enable/disable CM logging */
+#define __VIOC_GETCELLSTATUS		35	/* get status info for a cell entry */
+#define __VIOC_SETCELLSTATUS		36	/* set status info for a cell entry */
+#define __VIOC_FLUSHVOLUME		37	/* flush cached data from a volume */
+#define __VIOC_AFS_SYSNAME		38	/* get/set the @sys mapping */
+#define __VIOC_EXPORTAFS		39	/* enable/disable NFS/AFS translation */
+#define __VIOCGETCACHEPARMS		40	/* get current cache parameter values */
+
+#define __VIOC(N)	_IOW('V',N,struct afs_ioctl)
+
+#define VIOCSETAL			__VIOC(__VIOCSETAL)
+#define VIOCGETAL			__VIOC(__VIOCGETAL)
+#define VIOCSETTOK			__VIOC(__VIOCSETTOK)
+#define VIOCGETVOLSTAT			__VIOC(__VIOCGETVOLSTAT)
+#define VIOCSETVOLSTAT			__VIOC(__VIOCSETVOLSTAT)
+#define VIOCFLUSH			__VIOC(__VIOCFLUSH)
+#define VIOCGETTOK			__VIOC(__VIOCGETTOK)
+#define VIOCUNLOG			__VIOC(__VIOCUNLOG)
+#define VIOCCKSERV			__VIOC(__VIOCCKSERV)
+#define VIOCCKBACK			__VIOC(__VIOCCKBACK)
+#define VIOCCKCONN			__VIOC(__VIOCCKCONN)
+#define VIOCWHEREIS			__VIOC(__VIOCWHEREIS)
+#define VIOCACCESS			__VIOC(__VIOCACCESS)
+#define VIOCUNPAG			__VIOC(__VIOCUNPAG)
+#define VIOCGETFID			__VIOC(__VIOCGETFID)
+#define VIOCSETCACHESIZE		__VIOC(__VIOCSETCACHESIZE)
+#define VIOCFLUSHCB			__VIOC(__VIOCFLUSHCB)
+#define VIOCNEWCELL			__VIOC(__VIOCNEWCELL)
+#define VIOCGETCELL			__VIOC(__VIOCGETCELL)
+#define VIOC_DELETE_MT_PT		__VIOC(__VIOC_DELETE_MT_PT)
+#define VIOC_STAT_MT_PT			__VIOC(__VIOC_STAT_MT_PT)
+#define VIOC_FILE_CELL_NAME		__VIOC(__VIOC_FILE_CELL_NAME)
+#define VIOC_GET_WS_CELL		__VIOC(__VIOC_GET_WS_CELL)
+#define VIOC_AFS_MARINER_HOST		__VIOC(__VIOC_AFS_MARINER_HOST)
+#define VIOC_GET_PRIMARY_CELL		__VIOC(__VIOC_GET_PRIMARY_CELL)
+#define VIOC_VENUSLOG			__VIOC(__VIOC_VENUSLOG)
+#define VIOC_GETCELLSTATUS		__VIOC(__VIOC_GETCELLSTATUS)
+#define VIOC_SETCELLSTATUS		__VIOC(__VIOC_SETCELLSTATUS)
+#define VIOC_FLUSHVOLUME		__VIOC(__VIOC_FLUSHVOLUME)
+#define VIOC_AFS_SYSNAME		__VIOC(__VIOC_AFS_SYSNAME)
+#define VIOC_EXPORTAFS			__VIOC(__VIOC_EXPORTAFS)
+#define VIOCGETCACHEPARMS		__VIOC(__VIOCGETCACHEPARMS)
+
+/* level two multiplexor for AFS control operations */
+#define	AFSOP_START_RXCALLBACK		0
+#define	AFSOP_START_AFS			1
+#define	AFSOP_START_BKG			2
+#define	AFSOP_START_TRUNCDAEMON		3
+#define AFSOP_START_CS			4
+#define	AFSOP_ADDCELL			5
+#define	AFSOP_CACHEINIT			6
+#define	AFSOP_CACHEINFO			7
+#define	AFSOP_VOLUMEINFO		8
+#define	AFSOP_CACHEFILE			9
+#define	AFSOP_CACHEINODE		10
+#define	AFSOP_AFSLOG			11
+#define	AFSOP_ROOTVOLUME		12
+#define	AFSOP_STARTLOG			14
+#define	AFSOP_ENDLOG			15
+#define AFSOP_AFS_VFSMOUNT		16
+#define AFSOP_ADVISEADDR		17
+#define AFSOP_CLOSEWAIT			18
+#define	AFSOP_RXEVENT_DAEMON		19
+#define	AFSOP_GETMTU			20
+#define	AFSOP_GETIFADDRS		21
+#define	AFSOP_ADDCELL2			29
+#define	AFSOP_AFSDB_HANDLER		30
+#define	AFSOP_SET_DYNROOT		31
+#define	AFSOP_ADDCELLALIAS		32
+#define	AFSOP_SET_FAKESTAT		33
+#define	AFSOP_CELLINFO			34
+#define	AFSOP_SET_THISCELL		35
+#define AFSOP_BASIC_INIT		36
+
+/* level two multiplexor for In Core Logging (ICL) call */
+#define AFS_ICL_OP_COPYOUT		1
+#define	AFS_ICL_OP_ENUMLOGS		2
+#define	AFS_ICL_OP_CLRLOG		3
+#define	AFS_ICL_OP_CLRSET		4
+#define AFS_ICL_OP_CLRALL		5
+#define AFS_ICL_OP_ENUMSETS		6
+#define	AFS_ICL_OP_SETSTAT		7
+#define	AFS_ICL_OP_SETSTATALL		8
+#define AFS_ICL_OP_SETLOGSIZE		9
+#define	AFS_ICL_OP_ENUMLOGSBYSET	10
+#define AFS_ICL_OP_GETSETINFO		11
+#define AFS_ICL_OP_GETLOGINFO		12
+#define AFS_ICL_OP_COPYOUTCLR		13
+#define AFS_ICL_OP_VERSION		14
+
+/*
+ * all AFS pioctls take the same sort of argument
+ */
+struct afs_ioctl
+{
+	char		*in;		/* input buffer */
+	char		*out;		/* output buffer */
+	int16_t		in_size;	/* size of input buffer */
+	int16_t		out_size;	/* maximum size of output buffer */
+};
+
+#define AFS_IOCTL_IN_SIZE_MAX	2048
+#define AFS_IOCTL_OUT_SIZE_MAX	2048
+
+/*
+ * but this is used if the kernel is 64-bit, which is really strange
+ */
+struct afs_ioctl32
+{
+	u_int32_t	in;		/* input buffer address (cast to pointer) */
+	u_int32_t	out;		/* output buffer address (cast to pointer) */
+	int16_t		in_size;	/* size of input buffer */
+	int16_t		out_size;	/* maximum size of output buffer */
+};
+
+
+
+#endif /* _LINUX_AFS_SYSCALL_H */
diff -uNr linux-2.5.69/include/linux/cred.h linux-2.5.69-cred/include/linux/cred.h
--- linux-2.5.69/include/linux/cred.h	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.5.69-cred/include/linux/cred.h	2003-05-13 13:20:06.000000000 +0100
@@ -0,0 +1,79 @@
+#ifndef _LINUX_CRED_H
+#define _LINUX_CRED_H
+
+#ifdef __KERNEL__
+
+#include <linux/param.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <asm/atomic.h>
+
+/*
+ * VFS session authentication token cache
+ *
+ * This is used to store the data required for extra levels of filesystem
+ * security (such as AFS/NFS kerberos keys, Samba workgroup/user/pass, or NTFS
+ * ACLs).
+ *
+ * VFS authentication tokens contain a single blob of data, consisting of three
+ * parts, all next to each other:
+ *   (1) An FS name
+ *   (2) A key
+ *   (3) An arbitrary chunk of data
+ *
+ * Token blobs must not be changed once passed to the core kernel for management
+ */
+struct vfs_pag
+{
+	struct rb_node		node;
+	atomic_t		usage;
+	pag_t			pag;	/* Process Authentication Group ID */
+	struct list_head	tokens;	/* authentication tokens */
+	rwlock_t		lock;
+};
+
+struct vfs_token
+{
+	atomic_t		usage;
+	struct list_head	link;	/* link in pag's list */
+	unsigned short		k_off;	/* offset of key in blob */
+	unsigned short		d_off;	/* offset of data in blob */
+	size_t			size;	/* size of blob */
+	void			*blob;	/* blob containing key + data */
+};
+
+extern int sys_setpag(pag_t);
+extern int sys_getpag(void);
+extern void vfs_unpag(const char *fsname);
+
+extern void vfs_pag_put(struct vfs_pag *);
+
+static inline struct vfs_pag *vfs_pag_get(struct vfs_pag *vfspag)
+{
+	atomic_inc(&vfspag->usage);
+	return vfspag;
+}
+
+static inline int is_vfs_token_valid(struct vfs_token *vtoken)
+{
+	return !list_empty(&vtoken->link);
+}
+
+extern int vfs_pag_add_token(const char *fsname,
+			     unsigned short klen,
+			     const void *key,
+			     size_t dlen,
+			     const void *data,
+			     struct vfs_token **_token);
+
+extern struct vfs_token *vfs_pag_find_token(const char *fsname,
+					    unsigned short klen,
+					    const void *key);
+
+extern void vfs_pag_withdraw_token(struct vfs_token *vtoken);
+
+extern void vfs_token_put(struct vfs_token *vtoken);
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_CRED_H */
diff -uNr linux-2.5.69/include/linux/fs.h linux-2.5.69-cred/include/linux/fs.h
--- linux-2.5.69/include/linux/fs.h	2003-05-13 11:02:22.000000000 +0100
+++ linux-2.5.69-cred/include/linux/fs.h	2003-05-13 11:02:35.000000000 +0100
@@ -430,6 +430,7 @@
 	mode_t			f_mode;
 	loff_t			f_pos;
 	struct fown_struct	f_owner;
+	struct vfs_token	*f_token;	/* governing credential */
 	unsigned int		f_uid, f_gid;
 	int			f_error;
 	struct file_ra_state	f_ra;
diff -uNr linux-2.5.69/include/linux/sched.h linux-2.5.69-cred/include/linux/sched.h
--- linux-2.5.69/include/linux/sched.h	2003-05-06 15:07:12.000000000 +0100
+++ linux-2.5.69-cred/include/linux/sched.h	2003-05-13 10:29:18.000000000 +0100
@@ -28,6 +28,7 @@
 #include <linux/completion.h>
 #include <linux/pid.h>
 #include <linux/percpu.h>
+#include <linux/cred.h>
 
 struct exec_domain;
 
@@ -387,6 +388,7 @@
 	gid_t gid,egid,sgid,fsgid;
 	int ngroups;
 	gid_t	groups[NGROUPS];
+	struct vfs_pag *vfspag;
 	kernel_cap_t   cap_effective, cap_inheritable, cap_permitted;
 	int keep_capabilities:1;
 	struct user_struct *user;
diff -uNr linux-2.5.69/include/linux/types.h linux-2.5.69-cred/include/linux/types.h
--- linux-2.5.69/include/linux/types.h	2003-05-06 15:04:31.000000000 +0100
+++ linux-2.5.69-cred/include/linux/types.h	2003-05-12 10:19:08.000000000 +0100
@@ -24,6 +24,7 @@
 typedef __kernel_nlink_t	nlink_t;
 typedef __kernel_off_t		off_t;
 typedef __kernel_pid_t		pid_t;
+typedef __kernel_pag_t		pag_t;
 typedef __kernel_daddr_t	daddr_t;
 typedef __kernel_key_t		key_t;
 typedef __kernel_suseconds_t	suseconds_t;
diff -uNr linux-2.5.69/init/main.c linux-2.5.69-cred/init/main.c
--- linux-2.5.69/init/main.c	2003-05-06 15:07:12.000000000 +0100
+++ linux-2.5.69-cred/init/main.c	2003-05-13 14:08:11.000000000 +0100
@@ -80,6 +80,7 @@
 extern void pidhash_init(void);
 extern void pidmap_init(void);
 extern void pte_chain_init(void);
+extern void credentials_init(void);
 extern void radix_tree_init(void);
 extern void free_initmem(void);
 extern void populate_rootfs(void);
@@ -434,6 +435,7 @@
 	pidmap_init();
 	pgtable_cache_init();
 	pte_chain_init();
+	credentials_init();
 	fork_init(num_physpages);
 	proc_caches_init();
 	security_scaffolding_startup();
diff -uNr linux-2.5.69/kernel/cred.c linux-2.5.69-cred/kernel/cred.c
--- linux-2.5.69/kernel/cred.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.5.69-cred/kernel/cred.c	2003-05-13 13:21:06.000000000 +0100
@@ -0,0 +1,367 @@
+/* cred.c: authentication credentials management
+ *
+ * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+
+static kmem_cache_t *vfs_token_cache;
+static kmem_cache_t *vfs_pag_cache;
+
+static struct rb_root	vfs_pag_tree;
+static spinlock_t	vfs_pag_lock = SPIN_LOCK_UNLOCKED;
+static pag_t		vfs_pag_next = 1;
+
+static void vfs_pag_init_once(void *_vfspag, kmem_cache_t *cachep, unsigned long flags)
+{
+	struct vfs_pag *vfspag = _vfspag;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) {
+		memset(vfspag,0,sizeof(*vfspag));
+		INIT_LIST_HEAD(&vfspag->tokens);
+		rwlock_init(&vfspag->lock);
+	}
+}
+
+static void vfs_token_init_once(void *_vtoken, kmem_cache_t *cachep, unsigned long flags)
+{
+	struct vfs_token *vtoken = _vtoken;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) {
+		memset(vtoken,0,sizeof(*vtoken));
+		INIT_LIST_HEAD(&vtoken->link);
+	}
+}
+
+void __init credentials_init(void)
+{
+	vfs_pag_cache = kmem_cache_create("vfs_pag",
+					      sizeof(struct vfs_pag),
+					      0,
+					      SLAB_HWCACHE_ALIGN,
+					      vfs_pag_init_once, NULL);
+	if (!vfs_pag_cache)
+		panic("Cannot create vfs pag SLAB cache");
+
+	vfs_token_cache = kmem_cache_create("vfs_token",
+					    sizeof(struct vfs_token),
+					    0,
+					    SLAB_HWCACHE_ALIGN,
+					    vfs_token_init_once, NULL);
+	if (!vfs_token_cache)
+		panic("Cannot create vfs token SLAB cache");
+}
+
+/*****************************************************************************/
+/*
+ * join an existing PAG (+ve), run without PAG (0), or create and join new PAG (-1)
+ * - returns ID of PAG joined or 0 if now running without a PAG
+ */
+int sys_setpag(pag_t pag)
+{
+	struct task_struct *tsk = current;
+	struct vfs_pag *vfspag, *xvfspag;
+	struct rb_node **p, *parent;
+
+	if (pag>0) {
+		/* join existing PAG */
+		if (tsk->vfspag->pag &&
+		    tsk->vfspag->pag==pag)
+			return pag;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		spin_lock(&vfs_pag_lock);
+
+		parent = NULL;
+		p = &vfs_pag_tree.rb_node;
+
+		while (*p) {
+			parent = *p;
+			vfspag = rb_entry(parent,struct vfs_pag,node);
+
+			if (pag < vfspag->pag)
+				p = &(*p)->rb_left;
+			else if (pag > vfspag->pag)
+				p = &(*p)->rb_right;
+			else
+				goto pag_found;
+		}
+
+		spin_unlock(&vfs_pag_lock);
+		return -ENOENT;
+
+	pag_found:
+		xvfspag = xchg(&current->vfspag,vfs_pag_get(vfspag));
+		spin_unlock(&vfs_pag_lock);
+
+		if (xvfspag) vfs_pag_put(xvfspag);
+		return pag;
+	}
+	else if (pag==0) {
+		/* run without a PAG */
+		xvfspag = xchg(&current->vfspag,NULL);
+
+		if (xvfspag) vfs_pag_put(xvfspag);
+		return 0;
+	}
+	else if (pag!=-1) {
+		     return -EINVAL;
+	}
+
+	/* start new PAG */
+	vfspag = kmem_cache_alloc(vfs_pag_cache,SLAB_KERNEL);
+	if (!vfspag)
+		return -ENOMEM;
+
+	atomic_set(&vfspag->usage,1);
+
+	/* PAG IDs must be +ve, >0 and unique */
+	spin_lock(&vfs_pag_lock);
+
+	vfspag->pag = vfs_pag_next++;
+	if (vfspag->pag<1)
+		vfspag->pag = 1;
+
+	parent = NULL;
+	p = &vfs_pag_tree.rb_node;
+
+	while (*p) {
+		parent = *p;
+		xvfspag = rb_entry(parent,struct vfs_pag,node);
+
+		if (vfspag->pag < xvfspag->pag)
+			p = &(*p)->rb_left;
+		else if (vfspag->pag > xvfspag->pag)
+			p = &(*p)->rb_right;
+		else
+			goto pag_exists;
+	}
+	goto insert_here;
+
+	/* we found a PAG of the same ID - walk the tree from that point
+	 * looking for the next unused PAG */
+ pag_exists:
+	for (;;) {
+		vfspag->pag = vfs_pag_next++;
+		if (vfspag->pag<1)
+			vfspag->pag = 1;
+
+		if (!parent->rb_parent)
+			p = &vfs_pag_tree.rb_node;
+		else if (parent->rb_parent->rb_left==parent)
+			p = &parent->rb_parent->rb_left;
+		else
+			p = &parent->rb_parent->rb_right;
+
+		parent = rb_next(parent);
+		if (!parent)
+			break;
+
+		xvfspag = rb_entry(parent,struct vfs_pag,node);
+		if (vfspag->pag < xvfspag->pag)
+			goto insert_here;
+	}
+
+ insert_here:
+	rb_link_node(&vfspag->node,parent,p);
+	rb_insert_color(&vfspag->node,&vfs_pag_tree);
+	spin_unlock(&vfs_pag_lock);
+
+	xvfspag = xchg(&current->vfspag,vfspag);
+	if (xvfspag) vfs_pag_put(xvfspag);
+
+	return vfspag->pag;
+} /* end sys_setpag() */
+
+/*****************************************************************************/
+/*
+ * get the PAG of the current process, or 0 if it doesn't have one
+ */
+int sys_getpag(void)
+{
+	struct vfs_pag *vfspag = current->vfspag;
+
+	return vfspag ? vfspag->pag : 0;
+
+} /* end sys_getpag() */
+
+/*****************************************************************************/
+/*
+ * dispose of a VFS pag
+ */
+void vfs_pag_put(struct vfs_pag *vfspag)
+{
+	struct vfs_token *vtoken;
+
+	if (atomic_dec_and_lock(&vfspag->usage,&vfs_pag_lock)) {
+		rb_erase(&vfspag->node,&vfs_pag_tree);
+		spin_unlock(&vfs_pag_lock);
+
+		while (!list_empty(&vfspag->tokens)) {
+			vtoken = list_entry(vfspag->tokens.next,struct vfs_token,link);
+			list_del_init(&vtoken->link);
+			vfs_token_put(vtoken);
+		}
+
+		kmem_cache_free(vfs_pag_cache,vfspag);
+	}
+
+} /* end vfs_pag_put() */
+
+/*****************************************************************************/
+/*
+ * dispose of a VFS token
+ */
+void vfs_token_put(struct vfs_token *vtoken)
+{
+	if (atomic_dec_and_test(&vtoken->usage)) {
+		kfree(vtoken->blob);
+		kmem_cache_free(vfs_pag_cache,vtoken);
+	}
+
+} /* end vfs_token_put() */
+
+/*****************************************************************************/
+/*
+ * add an authentication token to a pag list
+ */
+int vfs_pag_add_token(const char *fsname,
+		      unsigned short klen,
+		      const void *key,
+		      size_t dlen,
+		      const void *data,
+		      struct vfs_token **_vtoken)
+{
+	struct vfs_token *vtoken;
+	struct vfs_pag *vfspag = current->vfspag;
+
+	*_vtoken = NULL;
+
+	if (!vfspag)
+		return -EACCES;
+
+	vtoken = kmem_cache_alloc(vfs_token_cache,SLAB_KERNEL);
+	if (!vtoken)
+		return -ENOMEM;
+
+	vtoken->k_off	= strlen(fsname) + 1;
+	vtoken->d_off	= vtoken->k_off + klen;
+	vtoken->size	= vtoken->d_off + dlen;
+
+	vtoken->blob = kmalloc(vtoken->size,SLAB_KERNEL);
+	if (!vtoken->blob) {
+		kfree(vtoken);
+		return -ENOMEM;
+	}
+
+	atomic_set(&vtoken->usage,1);
+
+	memcpy(vtoken->blob,fsname,vtoken->k_off);
+	memcpy(vtoken->blob+vtoken->k_off,key,klen);
+	memcpy(vtoken->blob+vtoken->d_off,key,dlen);
+
+	write_lock(&vfspag->lock);
+	list_add_tail(&vtoken->link,&vfspag->tokens);
+	write_unlock(&vfspag->lock);
+
+	*_vtoken = vtoken;
+	return 0;
+} /* end vfs_pag_add_token() */
+
+EXPORT_SYMBOL(vfs_pag_add_token);
+
+/*****************************************************************************/
+/*
+ * search for a token covering a particular filesystem key in the specified pag list
+ */
+struct vfs_token *vfs_pag_find_token(const char *fsname,
+				     unsigned short klen,
+				     const void *key)
+{
+	struct vfs_token *vtoken;
+	struct vfs_pag *vfspag = current->vfspag;
+
+	if (!vfspag) return NULL;
+
+	read_lock(&vfspag->lock);
+
+	list_for_each_entry(vtoken,&vfspag->tokens,link) {
+		if (vtoken->d_off-vtoken->k_off == klen &&
+		    strcmp(vtoken->blob,fsname)==0 &&
+		    memcmp(vtoken->blob+vtoken->k_off,key,klen)==0)
+			goto found;		    
+	}
+
+	read_unlock(&vfspag->lock);
+	return NULL;
+
+ found:
+	read_unlock(&vfspag->lock);
+	return vtoken;
+} /* end vfs_pag_find_token() */
+
+EXPORT_SYMBOL(vfs_pag_find_token);
+
+/*****************************************************************************/
+/*
+ * withdraw a token from a pag list
+ */
+void vfs_pag_withdraw_token(struct vfs_token *vtoken)
+{
+	struct vfs_pag *vfspag = current->vfspag;
+
+	if (!vfspag)
+		return;
+
+	write_lock(&vfspag->lock);
+	list_del_init(&vtoken->link);
+	write_unlock(&vfspag->lock);
+
+	vfs_token_put(vtoken);
+
+} /* end vfs_pag_withdraw_token() */
+
+EXPORT_SYMBOL(vfs_pag_withdraw_token);
+
+/*****************************************************************************/
+/*
+ * withdraw all tokens for the named filesystem from the current PAG
+ */
+void vfs_unpag(const char *fsname)
+{
+	struct list_head *_n, *_p;
+	struct vfs_token *vtoken;
+	struct vfs_pag *vfspag = current->vfspag;
+
+	if (!vfspag)
+		return;
+
+	write_lock(&vfspag->lock);
+
+	list_for_each_safe(_p,_n,&vfspag->tokens) {
+		vtoken = list_entry(_p,struct vfs_token,link);
+
+		if (strcmp(fsname,vtoken->blob)==0) {
+			list_del_init(&vtoken->link);
+			vfs_token_put(vtoken);
+		}
+	}
+
+	write_unlock(&vfspag->lock);
+
+} /* end vfs_unpag() */
diff -uNr linux-2.5.69/kernel/fork.c linux-2.5.69-cred/kernel/fork.c
--- linux-2.5.69/kernel/fork.c	2003-05-06 15:07:12.000000000 +0100
+++ linux-2.5.69-cred/kernel/fork.c	2003-05-13 10:39:37.000000000 +0100
@@ -884,6 +884,9 @@
 
 	if (clone_flags & CLONE_CHILD_SETTID)
 		p->set_child_tid = child_tidptr;
+
+	if (p->vfspag) vfs_pag_get(p->vfspag);
+
 	/*
 	 * Clear TID on mm_release()?
 	 */
diff -uNr linux-2.5.69/kernel/Makefile linux-2.5.69-cred/kernel/Makefile
--- linux-2.5.69/kernel/Makefile	2003-05-06 15:04:56.000000000 +0100
+++ linux-2.5.69-cred/kernel/Makefile	2003-05-13 10:45:27.000000000 +0100
@@ -3,7 +3,7 @@
 #
 
 obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
-	    exit.o itimer.o time.o softirq.o resource.o \
+	    cred.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o futex.o pid.o \
 	    rcupdate.o intermodule.o extable.o params.o posix-timers.o


^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2003-05-17 20:58 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-05-13 15:34 [PATCH] in-core AFS multiplexor and PAG support David Howells
2003-05-13 15:39 David Howells
2003-05-13 15:52 ` Linus Torvalds
2003-05-13 15:44   ` Alan Cox
2003-05-13 16:52     ` Jan Harkes
2003-05-13 16:57     ` Linus Torvalds
2003-05-13 16:39       ` Alan Cox
2003-05-13 16:05   ` David Howells
2003-05-13 16:47     ` Linus Torvalds
2003-05-13 17:20       ` Jan Harkes
2003-05-13 18:21         ` David Howells
2003-05-17 12:30         ` Pavel Machek
2003-05-13 17:23       ` Trond Myklebust
2003-05-15 11:41         ` Ingo Oeser
2003-05-13 17:42       ` David Howells
2003-05-13 16:03 ` Christoph Hellwig
2003-05-13 16:12   ` David Howells
2003-05-13 20:23     ` Christoph Hellwig
2003-05-13 16:39   ` Jeff Garzik
2003-05-13 16:57     ` David Howells
     [not found] <20030513182950.GB30766@delft.aura.cs.cmu.edu>
2003-05-13 18:53 ` David Howells

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).