linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] CKRM: 3/10 CKRM:  Core ckrm, rcfs
@ 2004-11-29 18:47 Gerrit Huizenga
  2004-11-29 22:00 ` Greg KH
  0 siblings, 1 reply; 13+ messages in thread
From: Gerrit Huizenga @ 2004-11-29 18:47 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, Rik van Riel, Chris Mason, ckrm-tech

Main code for CKRM default classification engine.  Adds Resrouce
Control (rc) filesystem as mechanism for setting policies for
class assignments in CKRM.

Signed-Off-By: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-Off-By: Hubertus Franke <frankeh@us.ibm.com>
Signed-Off-By: Shailabh Nagar <nagar@us.ibm.com>
Signed-Off-By: Gerrit Huizenga <gh@us.ibm.com>
Signed-Off-By: Vivek Kashyap <vivk@us.ibm.com>


 include/linux/ckrm_ce.h     |  108 +++++
 include/linux/ckrm_events.h |    8 
 include/linux/ckrm_rc.h     |  355 ++++++++++++++++
 include/linux/rcfs.h        |   96 ++++
 include/linux/sched.h       |    6 
 init/main.c                 |    2 
 kernel/ckrm/Makefile        |    2 
 kernel/ckrm/ckrm.c          |  927 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/ckrm/ckrmutils.c     |  195 +++++++++
 9 files changed, 1694 insertions(+), 5 deletions(-)

Index: linux-2.6.10-rc2/include/linux/ckrm_ce.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.10-rc2/include/linux/ckrm_ce.h	2004-11-19 20:43:43.484218139 -0800
@@ -0,0 +1,108 @@
+/*
+ *  ckrm_ce.h - Header file to be used by Classification Engine of CKRM
+ *
+ * Copyright (C) Hubertus Franke, IBM Corp. 2003
+ *           (C) Shailabh Nagar,  IBM Corp. 2003
+ *           (C) Chandra Seetharaman, IBM Corp. 2003
+ * 
+ * Provides data structures, macros and kernel API of CKRM for 
+ * classification engine.
+ *
+ * Latest version, more details at http://ckrm.sf.net
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+/* Changes
+ *
+ * 12 Nov 2003
+ *        Created.
+ * 22 Apr 2004
+ *        Adopted to classtypes
+ */
+
+#ifndef _LINUX_CKRM_CE_H
+#define _LINUX_CKRM_CE_H
+
+#ifdef CONFIG_CKRM
+
+#include <linux/ckrm_events.h>
+
+/*
+ * Action parameters identifying the cause of a task<->class notify callback 
+ * these can perculate up to user daemon consuming records send by the 
+ * classification engine
+ */
+
+#ifdef __KERNEL__
+
+typedef void *(*ce_classify_fct_t) (enum ckrm_event event, void *obj, ...);
+typedef void (*ce_notify_fct_t) (enum ckrm_event event, void *classobj,
+				 void *obj);
+
+typedef struct ckrm_eng_callback {
+	/* general state information */
+	int always_callback;	/* set if CE should always be called back 
+				   regardless of numclasses */
+
+	/* callbacks which are called without holding locks */
+
+	unsigned long c_interest;	/* set of classification events of 
+					 * interest to CE 
+					 */
+
+	/* generic classify */
+	ce_classify_fct_t classify;
+
+	/* class added */
+	void (*class_add) (const char *name, void *core, int classtype);
+
+	/* class deleted */
+	void (*class_delete) (const char *name, void *core, int classtype);
+
+	/* callbacks which are called while holding task_lock(tsk) */
+	unsigned long n_interest;	/* set of notification events of 
+					 *  interest to CE 
+					 */
+	/* notify on class switch */
+	ce_notify_fct_t notify;	
+} ckrm_eng_callback_t;
+
+struct inode;
+struct dentry;
+
+typedef struct rbce_eng_callback {
+	int (*mkdir) (struct inode *, struct dentry *, int);	/* mkdir */
+	int (*rmdir) (struct inode *, struct dentry *);		/* rmdir */
+	int (*mnt) (void);
+	int (*umnt) (void);
+} rbce_eng_callback_t;
+
+extern int ckrm_register_engine(const char *name, ckrm_eng_callback_t *);
+extern int ckrm_unregister_engine(const char *name);
+
+extern void *ckrm_classobj(char *, int *classtype);
+extern int get_exe_path_name(struct task_struct *t, char *filename,
+			     int max_size);
+
+extern int rcfs_register_engine(rbce_eng_callback_t *);
+extern int rcfs_unregister_engine(rbce_eng_callback_t *);
+
+extern int ckrm_reclassify(int pid);
+
+#ifndef _LINUX_CKRM_RC_H
+
+extern void ckrm_core_grab(void *);
+extern void ckrm_core_drop(void *);
+#endif
+
+#endif /* CONFIG_CKRM */
+#endif /* __KERNEL__ */
+#endif /* _LINUX_CKRM_CE_H */
Index: linux-2.6.10-rc2/include/linux/ckrm_events.h
===================================================================
--- linux-2.6.10-rc2.orig/include/linux/ckrm_events.h	2004-11-19 20:40:52.517303823 -0800
+++ linux-2.6.10-rc2/include/linux/ckrm_events.h	2004-11-19 20:43:43.487217664 -0800
@@ -163,8 +163,6 @@
 struct user_struct;
 
 CKRM_DEF_CB_ARG(FORK, fork, struct task_struct *);
-CKRM_DEF_CB_ARG(NEWTASK, newtask, struct task_struct *);
-CKRM_DEF_CB_ARG(EXIT, exit, struct task_struct *);
 CKRM_DEF_CB_ARG(EXEC, exec, const char *);
 CKRM_DEF_CB(UID, uid);
 CKRM_DEF_CB(GID, gid);
@@ -177,9 +175,11 @@
 
 /* some other functions required */
 #ifdef CONFIG_CKRM
-void ckrm_cb_newtask(struct task_struct *);
-void ckrm_cb_exit(struct task_struct *);
+extern void ckrm_init(void);
+extern void ckrm_cb_newtask(struct task_struct *);
+extern void ckrm_cb_exit(struct task_struct *);
 #else
+#define ckrm_init()		do { } while (0)
 #define ckrm_cb_newtask(x)	do { } while (0)
 #define ckrm_cb_exit(x)		do { } while (0)
 #endif
Index: linux-2.6.10-rc2/include/linux/ckrm_rc.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.10-rc2/include/linux/ckrm_rc.h	2004-11-19 20:43:43.493216714 -0800
@@ -0,0 +1,355 @@
+/*
+ *  ckrm_rc.h - Header file to be used by Resource controllers of CKRM
+ *
+ * Copyright (C) Hubertus Franke, IBM Corp. 2003
+ *           (C) Shailabh Nagar,  IBM Corp. 2003
+ *           (C) Chandra Seetharaman, IBM Corp. 2003
+ *	     (C) Vivek Kashyap , IBM Corp. 2004
+ * 
+ * Provides data structures, macros and kernel API of CKRM for 
+ * resource controllers.
+ *
+ * More details at http://ckrm.sf.net
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+/*
+ * Changes
+ *
+ * 12 Nov 2003
+ *        Created.
+ */
+
+#ifndef _LINUX_CKRM_RC_H
+#define _LINUX_CKRM_RC_H
+
+#ifdef __KERNEL__
+
+#ifdef CONFIG_CKRM
+
+#include <linux/list.h>
+#include <linux/ckrm_events.h>
+#include <linux/ckrm_ce.h>
+#include <linux/seq_file.h>
+
+#define CKRM_MAX_CLASSTYPES         32	/* maximum number of class types */
+#define CKRM_MAX_CLASSTYPE_NAME     32 	/* maximum classtype name length */
+
+#define CKRM_MAX_RES_CTLRS           8	/* maximum resource controllers per classtype */
+#define CKRM_MAX_RES_NAME          128	/* maximum resource controller name length */
+
+struct ckrm_core_class;
+struct ckrm_classtype;
+
+/*
+ * Share specifications
+ */
+
+typedef struct ckrm_shares {
+	int my_guarantee;
+	int my_limit;
+	int total_guarantee;
+	int max_limit;
+	int unused_guarantee;	/* not used as parameters */
+	int cur_max_limit;	/* not used as parameters */
+} ckrm_shares_t;
+
+#define CKRM_SHARE_UNCHANGED     (-1)	
+#define CKRM_SHARE_DONTCARE      (-2)	
+#define CKRM_SHARE_DFLT_TOTAL_GUARANTEE (100) 
+#define CKRM_SHARE_DFLT_MAX_LIMIT     (100)  
+
+/*
+ * RESOURCE CONTROLLERS
+ */
+
+/* resource controller callback structure */
+
+typedef struct ckrm_res_ctlr {
+	char res_name[CKRM_MAX_RES_NAME];
+	int res_hdepth;		/* maximum hierarchy */
+	int resid;		/* (for now) same as the enum resid */
+	struct ckrm_classtype *classtype;    /* classtype owning this res ctlr */
+
+	/* allocate/free new resource class object for resource controller */
+	void *(*res_alloc) (struct ckrm_core_class * this,
+			    struct ckrm_core_class * parent);
+	void (*res_free) (void *);
+
+	/* set/get limits/guarantees for a resource controller class */
+	int (*set_share_values) (void *, struct ckrm_shares * shares);
+	int (*get_share_values) (void *, struct ckrm_shares * shares);
+
+	/* statistics and configuration access */
+	int (*get_stats) (void *, struct seq_file *);
+	int (*reset_stats) (void *);
+	int (*show_config) (void *, struct seq_file *);
+	int (*set_config) (void *, const char *cfgstr);
+
+	void (*change_resclass) (void *, void *, void *);
+} ckrm_res_ctlr_t;
+
+/*
+ * CKRM_CLASSTYPE
+ *
+ * A <struct ckrm_classtype> object describes a dimension for CKRM to classify 
+ * along. Need to provide methods to create and manipulate class objects in
+ * this dimension
+ */
+
+/* list of predefined class types, we always recognize */
+#define CKRM_CLASSTYPE_TASK_CLASS    0
+#define CKRM_CLASSTYPE_SOCKET_CLASS  1
+#define CKRM_RESV_CLASSTYPES         2	/* always +1 of last known type */
+
+#define CKRM_MAX_TYPENAME_LEN       32
+
+typedef struct ckrm_classtype {
+	/* TODO: Review for cache alignment */
+
+	/* resource controllers */
+
+	spinlock_t res_ctlrs_lock;  /* protect res ctlr related data */
+	int max_res_ctlrs;          /* max number of res ctlrs allowed */
+	int max_resid;              /* max resid used */
+	int resid_reserved;	    /* max number of reserved controllers */
+	long bit_res_ctlrs;	    /* bitmap of resource ID used */
+	atomic_t nr_resusers[CKRM_MAX_RES_CTLRS];
+	ckrm_res_ctlr_t *res_ctlrs[CKRM_MAX_RES_CTLRS];
+
+	/* state about my classes */
+
+	struct ckrm_core_class *default_class;	
+	struct list_head classes;  /* link all classes of this classtype */
+	int num_classes;	 
+
+	/* state about my ce interaction */
+	atomic_t ce_regd;		/* if CE registered */
+	int ce_cb_active;		/* if Callbacks active */
+	atomic_t ce_nr_users;		/* number of active transient calls */
+	struct ckrm_eng_callback ce_callbacks;	/* callback engine */
+
+	/* Begin classtype-rcfs private data. No rcfs/fs specific types used.  */
+
+	int mfidx;		/* Index into genmfdesc array used to initialize */
+	void *mfdesc;		/* Array of descriptors of root and magic files */
+	int mfcount;		/* length of above array */
+	void *rootde;		/* root dentry created by rcfs */
+	/* End rcfs private data */
+
+	char name[CKRM_MAX_TYPENAME_LEN]; /* currently same as mfdesc[0]->name  */
+	                                  /* but could be different */
+	int typeID;			  /* unique TypeID */
+	int maxdepth;			  /* maximum depth supported */
+
+	/* functions to be called on any class type by external API's */
+
+	struct ckrm_core_class *(*alloc) (struct ckrm_core_class * parent, 
+					  const char *name);	
+	int (*free) (struct ckrm_core_class * cls);	
+	int (*show_members) (struct ckrm_core_class *, struct seq_file *);
+	int (*show_stats) (struct ckrm_core_class *, struct seq_file *);
+	int (*show_config) (struct ckrm_core_class *, struct seq_file *);
+	int (*show_shares) (struct ckrm_core_class *, struct seq_file *);
+
+	int (*reset_stats) (struct ckrm_core_class *, const char *resname,
+			    const char *);
+	int (*set_config) (struct ckrm_core_class *, const char *resname,
+			   const char *cfgstr);
+	int (*set_shares) (struct ckrm_core_class *, const char *resname,
+			   struct ckrm_shares * shares);
+	int (*forced_reclassify) (struct ckrm_core_class *, const char *);
+
+	/* functions to be called on a class type by ckrm internals */
+
+	/* class initialization for new RC */
+	void (*add_resctrl) (struct ckrm_core_class *, int resid);	
+} ckrm_classtype_t;
+
+/*
+ * CKRM CORE CLASS
+ *      common part to any class structure (i.e. instance of a classtype)
+ */
+
+/*
+ * basic definition of a hierarchy that is to be used by the the CORE classes
+ * and can be used by the resource class objects
+ */
+
+#define CKRM_CORE_MAGIC		0xBADCAFFE
+
+typedef struct ckrm_hnode {
+	struct ckrm_core_class *parent;
+	struct list_head siblings;	
+	struct list_head children;	
+} ckrm_hnode_t;
+
+typedef struct ckrm_core_class {
+	struct ckrm_classtype *classtype;	
+	void *res_class[CKRM_MAX_RES_CTLRS];	/* resource classes */
+	spinlock_t class_lock;	                /* protects list,array above */
+
+	struct list_head objlist;		/* generic object list */
+	struct list_head clslist;		/* peer classtype classes */
+	struct dentry *dentry;			/* dentry of inode in the RCFS */
+	int magic;
+
+	struct ckrm_hnode hnode;		/* hierarchy */
+	rwlock_t hnode_rwlock;			/* protects hnode above. */
+	atomic_t refcnt;
+	const char *name;
+	int delayed;				/* core deletion delayed  */
+						/* because of race conditions */
+} ckrm_core_class_t;
+
+/* type coerce between derived class types and ckrm core class type */
+#define class_type(type,coreptr)   container_of(coreptr,type,core)
+#define class_core(clsptr)         (&(clsptr)->core)
+/* locking classes */
+#define class_lock(coreptr)        spin_lock(&(coreptr)->class_lock)
+#define class_unlock(coreptr)      spin_unlock(&(coreptr)->class_lock)
+/* what type is a class of ISA */
+#define class_isa(clsptr)          (class_core(clsptr)->classtype)
+
+/*
+ * OTHER
+ */
+
+#define ckrm_get_res_class(rescls, resid, type) \
+	((type*) (((resid != -1) && ((rescls) != NULL) \
+			   && ((rescls) != (void *)-1)) ? \
+	 ((struct ckrm_core_class *)(rescls))->res_class[resid] : NULL))
+
+
+extern int ckrm_register_res_ctlr(struct ckrm_classtype *, ckrm_res_ctlr_t *);
+extern int ckrm_unregister_res_ctlr(ckrm_res_ctlr_t *);
+
+extern int ckrm_validate_and_grab_core(struct ckrm_core_class *core);
+extern int ckrm_init_core_class(struct ckrm_classtype *clstype,
+				struct ckrm_core_class *dcore,
+				struct ckrm_core_class *parent,
+				const char *name);
+extern int ckrm_release_core_class(struct ckrm_core_class *);	
+
+/* TODO: can disappear after cls del debugging */
+
+extern struct ckrm_res_ctlr *ckrm_resctlr_lookup(struct ckrm_classtype *type,
+						 const char *resname);
+
+extern void ckrm_lock_hier(struct ckrm_core_class *);
+extern void ckrm_unlock_hier(struct ckrm_core_class *);
+extern struct ckrm_core_class *ckrm_get_next_child(struct ckrm_core_class *,
+						   struct ckrm_core_class *);
+
+extern void child_guarantee_changed(struct ckrm_shares *, int, int);
+extern void child_maxlimit_changed(struct ckrm_shares *, int);
+extern int set_shares(struct ckrm_shares *, struct ckrm_shares *,
+		      struct ckrm_shares *);
+
+/* classtype registration and lookup */
+extern int ckrm_register_classtype(struct ckrm_classtype *clstype);
+extern int ckrm_unregister_classtype(struct ckrm_classtype *clstype);
+extern struct ckrm_classtype *ckrm_find_classtype_by_name(const char *name);
+
+/* default functions that can be used in classtypes's function table */
+extern int ckrm_class_show_shares(struct ckrm_core_class *core,
+				  struct seq_file *seq);
+extern int ckrm_class_show_stats(struct ckrm_core_class *core,
+				 struct seq_file *seq);
+extern int ckrm_class_show_config(struct ckrm_core_class *core,
+				  struct seq_file *seq);
+extern int ckrm_class_set_config(struct ckrm_core_class *core,
+				 const char *resname, const char *cfgstr);
+extern int ckrm_class_set_shares(struct ckrm_core_class *core,
+				 const char *resname,
+				 struct ckrm_shares *shares);
+extern int ckrm_class_reset_stats(struct ckrm_core_class *core,
+				  const char *resname, const char *unused);
+
+static inline void ckrm_core_grab(struct ckrm_core_class *core)
+{
+	if (core)
+		atomic_inc(&core->refcnt);
+}
+
+static inline void ckrm_core_drop(struct ckrm_core_class *core)
+{
+	/* only make definition available in this context */
+	extern void ckrm_free_core_class(struct ckrm_core_class *core);
+	if (core && (atomic_dec_and_test(&core->refcnt)))
+		ckrm_free_core_class(core);
+}
+
+static inline unsigned int ckrm_is_core_valid(ckrm_core_class_t * core)
+{
+	return (core && (core->magic == CKRM_CORE_MAGIC));
+}
+
+/*
+ * iterate through all associate resource controllers:
+ * requires following arguments (ckrm_core_class *cls, 
+ *                               ckrm_res_ctrl   *ctlr,
+ *                               void            *robj,
+ *                               int              bmap)
+ */
+
+#define forall_class_resobjs(cls,rcbs,robj,bmap)			\
+       for ( bmap=((cls->classtype)->bit_res_ctlrs) ;			\
+	     ({ int rid; ((rid=ffs(bmap)-1) >= 0) &&			\
+	                 (bmap &= ~(1<<rid),				\
+				((rcbs=cls->classtype->res_ctlrs[rid])	\
+				 && (robj=cls->res_class[rid]))); });	\
+           )
+
+extern struct ckrm_classtype *ckrm_classtypes[];	
+
+/*
+ * CE Invocation interface
+ */
+
+#define ce_protect(ctype)      (atomic_inc(&((ctype)->ce_nr_users)))
+#define ce_release(ctype)      (atomic_dec(&((ctype)->ce_nr_users)))
+
+/* CE Classification callbacks with */
+
+#define CE_CLASSIFY_NORET(ctype, event, objs_to_classify...)		\
+do {									\
+	if ((ctype)->ce_cb_active					\
+	    && (test_bit(event,&(ctype)->ce_callbacks.c_interest)))	\
+		(*(ctype)->ce_callbacks.classify)(event,		\
+						  objs_to_classify);	\
+} while (0)
+
+#define CE_CLASSIFY_RET(ret, ctype, event, objs_to_classify...)		\
+do {									\
+	if ((ctype)->ce_cb_active					\
+	    && (test_bit(event,&(ctype)->ce_callbacks.c_interest)))	\
+		ret = (*(ctype)->ce_callbacks.classify)(event,		\
+							objs_to_classify);\
+} while (0)
+
+#define CE_NOTIFY(ctype, event, cls, objs_to_classify)			\
+do {									\
+	if ((ctype)->ce_cb_active					\
+	    && (test_bit(event,&(ctype)->ce_callbacks.n_interest)))	\
+		(*(ctype)->ce_callbacks.notify)(event,			\
+						cls,objs_to_classify);	\
+} while (0)
+
+/*
+ * RCFS related 
+ */
+
+/* vars needed by other modules/core */
+
+extern int rcfs_mounted;
+extern int rcfs_engine_regd;
+
+#endif /* CONFIG_CKRM */
+#endif /* __KERNEL__ */
+#endif /* _LINUX_CKRM_RC_H */
Index: linux-2.6.10-rc2/include/linux/rcfs.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.10-rc2/include/linux/rcfs.h	2004-11-19 20:43:43.497216080 -0800
@@ -0,0 +1,96 @@
+#ifndef _LINUX_RCFS_H
+#define _LINUX_RCFS_H
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/ckrm_events.h>
+#include <linux/ckrm_rc.h>
+#include <linux/ckrm_ce.h>
+
+/*
+ * The following declarations cannot be included in any of ckrm*.h files 
+ * without jumping hoops. Remove later when rearrangements done
+ */
+
+#define RCFS_MAGIC	0x4feedbac
+#define RCFS_MAGF_NAMELEN 20
+extern int RCFS_IS_MAGIC;
+
+#define rcfs_is_magic(dentry)  ((dentry)->d_fsdata == &RCFS_IS_MAGIC)
+
+typedef struct rcfs_inode_info {
+	ckrm_core_class_t *core;
+	char *name;
+	struct inode vfs_inode;
+} rcfs_inode_info_t;
+
+#define RCFS_DEFAULT_DIR_MODE	(S_IFDIR | S_IRUGO | S_IXUGO)
+#define RCFS_DEFAULT_FILE_MODE	(S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP |S_IROTH)
+
+struct rcfs_magf {
+	char name[RCFS_MAGF_NAMELEN];
+	int mode;
+	struct inode_operations *i_op;
+	struct file_operations *i_fop;
+};
+
+struct rcfs_mfdesc {
+	struct rcfs_magf *rootmf;	/* Root directory and its magic files */
+	int rootmflen;			/* length of above array */
+	/*
+	 * Can have a different magf describing magic files 
+	 * for non-root entries too.
+	 */
+};
+
+extern struct rcfs_mfdesc *genmfdesc[];
+
+inline struct rcfs_inode_info *RCFS_I(struct inode *inode);
+
+int rcfs_empty(struct dentry *);
+struct inode *rcfs_get_inode(struct super_block *, int, dev_t);
+int rcfs_mknod(struct inode *, struct dentry *, int, dev_t);
+int _rcfs_mknod(struct inode *, struct dentry *, int, dev_t);
+int rcfs_mkdir(struct inode *, struct dentry *, int);
+ckrm_core_class_t *rcfs_make_core(struct dentry *, struct ckrm_core_class *);
+struct dentry *rcfs_set_magf_byname(char *, void *);
+
+struct dentry *rcfs_create_internal(struct dentry *, struct rcfs_magf *, int);
+int rcfs_delete_internal(struct dentry *);
+int rcfs_create_magic(struct dentry *, struct rcfs_magf *, int);
+int rcfs_clear_magic(struct dentry *);
+
+extern struct super_operations rcfs_super_ops;
+extern struct address_space_operations rcfs_aops;
+
+extern struct inode_operations rcfs_dir_inode_operations;
+extern struct inode_operations rcfs_rootdir_inode_operations;
+extern struct inode_operations rcfs_file_inode_operations;
+
+extern struct file_operations target_fileops;
+extern struct file_operations shares_fileops;
+extern struct file_operations stats_fileops;
+extern struct file_operations config_fileops;
+extern struct file_operations members_fileops;
+extern struct file_operations reclassify_fileops;
+extern struct file_operations rcfs_file_operations;
+
+/* Callbacks into rcfs from ckrm */
+
+typedef struct rcfs_functions {
+	int (*mkroot) (struct rcfs_magf *, int, struct dentry **);
+	int (*rmroot) (struct dentry *);
+	int (*register_classtype) (ckrm_classtype_t *);
+	int (*deregister_classtype) (ckrm_classtype_t *);
+} rcfs_fn_t;
+
+int rcfs_register_classtype(ckrm_classtype_t *);
+int rcfs_deregister_classtype(ckrm_classtype_t *);
+int rcfs_mkroot(struct rcfs_magf *, int, struct dentry **);
+int rcfs_rmroot(struct dentry *);
+
+#define RCFS_ROOT "/rcfs"  	/* TODO:  Should use the mount point */
+extern struct dentry *rcfs_rootde;
+extern rbce_eng_callback_t rcfs_eng_callbacks;
+
+#endif	/* _LINUX_RCFS_H */
Index: linux-2.6.10-rc2/include/linux/sched.h
===================================================================
--- linux-2.6.10-rc2.orig/include/linux/sched.h	2004-11-19 20:41:45.564899680 -0800
+++ linux-2.6.10-rc2/include/linux/sched.h	2004-11-19 20:43:43.503215130 -0800
@@ -30,6 +30,7 @@
 #include <linux/pid.h>
 #include <linux/percpu.h>
 #include <linux/topology.h>
+#include <linux/taskdelays.h>
 
 struct exec_domain;
 
@@ -667,6 +668,11 @@
 #ifdef CONFIG_DELAY_ACCT
 	struct task_delay_info delays;
 #endif
+#ifdef CONFIG_CKRM
+	spinlock_t  ckrm_tsklock;
+	void       *ce_data;
+#endif
+
 };
 
 static inline pid_t process_group(struct task_struct *tsk)
Index: linux-2.6.10-rc2/init/main.c
===================================================================
--- linux-2.6.10-rc2.orig/init/main.c	2004-11-14 17:27:03.000000000 -0800
+++ linux-2.6.10-rc2/init/main.c	2004-11-19 20:44:11.316808720 -0800
@@ -46,6 +46,7 @@
 #include <linux/rmap.h>
 #include <linux/mempolicy.h>
 #include <linux/key.h>
+#include <linux/ckrm_events.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -523,6 +524,7 @@
 	rcu_init();
 	init_IRQ();
 	pidhash_init();
+	ckrm_init();
 	init_timers();
 	softirq_init();
 	time_init();
Index: linux-2.6.10-rc2/kernel/ckrm/ckrm.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.10-rc2/kernel/ckrm/ckrm.c	2004-11-19 20:43:43.520212437 -0800
@@ -0,0 +1,927 @@
+/* ckrm.c - Class-based Kernel Resource Management (CKRM)
+ *
+ * Copyright (C) Hubertus Franke, IBM Corp. 2003, 2004
+ *           (C) Shailabh Nagar,  IBM Corp. 2003, 2004
+ *           (C) Chandra Seetharaman,  IBM Corp. 2003
+ *	     (C) Vivek Kashyap,	IBM Corp. 2004
+ * 
+ * 
+ * Provides kernel API of CKRM for in-kernel,per-resource controllers 
+ * (one each for cpu, memory, io, network) and callbacks for 
+ * classification modules.
+ *
+ * Latest version, more details at http://ckrm.sf.net
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+/*
+ * Changes
+ *
+ * 28 Aug 2003
+ *        Created.
+ * 06 Nov 2003
+ *        Made modifications to suit the new RBCE module.
+ * 10 Nov 2003
+ *        Fixed a bug in fork and exit callbacks. Added callbacks_active and
+ *        surrounding logic. Added task paramter for all CE callbacks.
+ * 23 Mar 2004
+ *        moved to referenced counted class objects and correct locking
+ * 19 Apr 2004
+ *        Integrated ckrm hooks, classtypes, ...
+ *  
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include <linux/mm.h>
+#include <asm/errno.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/ckrm_rc.h>
+#include <linux/rcfs.h>
+#include <net/sock.h>
+#include <linux/ip.h>
+
+rwlock_t ckrm_class_lock = RW_LOCK_UNLOCKED;	/* protects classlists */
+
+struct rcfs_functions rcfs_fn;
+EXPORT_SYMBOL_GPL(rcfs_fn);
+
+int rcfs_engine_regd;		/* rcfs state needed by another module */
+EXPORT_SYMBOL_GPL(rcfs_engine_regd);
+
+int rcfs_mounted;
+EXPORT_SYMBOL_GPL(rcfs_mounted);
+
+/*
+ * Helper Functions
+ */
+
+/*
+ * Return TRUE if the given resource is registered.
+ */
+inline unsigned int is_res_regd(struct ckrm_classtype *clstype, int resid)
+{
+	return ((resid >= 0) && (resid < clstype->max_resid) &&
+		test_bit(resid, &clstype->bit_res_ctlrs)
+	    );
+}
+
+/*
+ * Return TRUE if the given core class pointer is valid.
+ */
+struct ckrm_res_ctlr *ckrm_resctlr_lookup(struct ckrm_classtype *clstype,
+					  const char *resname)
+{
+	int resid = -1;
+
+	if (!clstype || !resname) {
+		return NULL;
+	}
+	for (resid = 0; resid < clstype->max_resid; resid++) {
+		if (test_bit(resid, &clstype->bit_res_ctlrs)) {
+			struct ckrm_res_ctlr *rctrl = clstype->res_ctlrs[resid];
+			if (!strncmp(resname, rctrl->res_name,
+				     CKRM_MAX_RES_NAME))
+				return rctrl;
+		}
+	}
+	return NULL;
+}
+
+EXPORT_SYMBOL_GPL(ckrm_resctlr_lookup);
+
+/* given a classname return the class handle and its classtype*/
+void *ckrm_classobj(char *classname, int *classTypeID)
+{
+	int i;
+
+	*classTypeID = -1;
+	if (!classname || !*classname) {
+		return NULL;
+	}
+
+	read_lock(&ckrm_class_lock);
+	for (i = 0; i < CKRM_MAX_CLASSTYPES; i++) {
+		struct ckrm_classtype *ctype = ckrm_classtypes[i];
+		struct ckrm_core_class *core;
+
+		if (ctype == NULL)
+			continue;
+		list_for_each_entry(core, &ctype->classes, clslist) {
+			if (core->name && !strcmp(core->name, classname)) {
+				// FIXME:   should grep reference..
+				read_unlock(&ckrm_class_lock);
+				*classTypeID = ctype->typeID;
+				return core;
+			}
+		}
+	}
+	read_unlock(&ckrm_class_lock);
+	return NULL;
+}
+
+EXPORT_SYMBOL_GPL(is_res_regd);
+EXPORT_SYMBOL_GPL(ckrm_classobj);
+
+/*
+ * Internal Functions/macros
+ */
+
+static inline void set_callbacks_active(struct ckrm_classtype *ctype)
+{
+	ctype->ce_cb_active = ((atomic_read(&ctype->ce_regd) > 0) &&
+			       (ctype->ce_callbacks.always_callback
+				|| (ctype->num_classes > 1)));
+}
+
+int ckrm_validate_and_grab_core(struct ckrm_core_class *core)
+{
+	int rc = 0;
+	read_lock(&ckrm_class_lock);
+	if (likely(ckrm_is_core_valid(core))) {
+		ckrm_core_grab(core);
+		rc = 1;
+	}
+	read_unlock(&ckrm_class_lock);
+	return rc;
+}
+
+/*
+ * Interfaces for classification engine
+ */
+
+/*
+ * Registering a callback structure by the classification engine.
+ *
+ * Returns typeId of class on success -errno for failure.
+ */
+int ckrm_register_engine(const char *typename, ckrm_eng_callback_t * ecbs)
+{
+	struct ckrm_classtype *ctype;
+
+	ctype = ckrm_find_classtype_by_name(typename);
+	if (ctype == NULL)
+		return (-ENOENT);
+
+	atomic_inc(&ctype->ce_regd);
+
+	/* another engine registered or trying to register ? */
+	if (atomic_read(&ctype->ce_regd) != 1) {
+		atomic_dec(&ctype->ce_regd);
+		return (-EBUSY);
+	}
+
+	/*
+	 * One of the following must be set: 
+	 * classify, class_delete (due to object reference) or 
+	 * notify (case where notification supported but not classification)
+	 * The function pointer must be set the momement the mask is non-null
+	 */
+	if (!(((ecbs->classify) && (ecbs->class_delete)) || (ecbs->notify)) ||
+	    (ecbs->c_interest && ecbs->classify == NULL) ||
+	    (ecbs->n_interest && ecbs->notify == NULL)) {
+		atomic_dec(&ctype->ce_regd);
+		return (-EINVAL);
+	}
+
+	ctype->ce_callbacks = *ecbs;
+	set_callbacks_active(ctype);
+
+	if (ctype->ce_callbacks.class_add) {
+		struct ckrm_core_class *core;
+
+		read_lock(&ckrm_class_lock);
+		list_for_each_entry(core, &ctype->classes, clslist) {
+			(*ctype->ce_callbacks.class_add) (core->name, core,
+							  ctype->typeID);
+		}
+		read_unlock(&ckrm_class_lock);
+	}
+	return ctype->typeID;
+}
+
+/*
+ * Unregistering a callback structure by the classification engine.
+ *
+ * Returns 0 on success -errno for failure.
+ */
+int ckrm_unregister_engine(const char *typename)
+{
+	struct ckrm_classtype *ctype;
+
+	ctype = ckrm_find_classtype_by_name(typename);
+	if (ctype == NULL)
+		return (-ENOENT);
+
+	ctype->ce_cb_active = 0;
+	if (atomic_read(&ctype->ce_nr_users) > 1) {
+		/* Somebody is currently using the engine, cannot deregister. */
+		return (-EAGAIN);
+	}
+	atomic_set(&ctype->ce_regd, 0);
+	memset(&ctype->ce_callbacks, 0, sizeof(ckrm_eng_callback_t));
+	return 0;
+}
+
+/*
+ * Interfaces to manipulate class (core or resource) hierarchies
+ */
+
+static void
+ckrm_add_child(struct ckrm_core_class *parent, struct ckrm_core_class *child)
+{
+	struct ckrm_hnode *cnode = &child->hnode;
+
+	if (!ckrm_is_core_valid(child)) {
+		printk(KERN_ERR "Invalid child %p given in ckrm_add_child\n",
+		       child);
+		return;
+	}
+	class_lock(child);
+	INIT_LIST_HEAD(&cnode->children);
+	INIT_LIST_HEAD(&cnode->siblings);
+
+	if (parent) {
+		struct ckrm_hnode *pnode;
+
+		if (!ckrm_is_core_valid(parent)) {
+			printk(KERN_ERR
+			       "Invalid parent %p given in ckrm_add_child\n",
+			       parent);
+			parent = NULL;
+		} else {
+			pnode = &parent->hnode;
+			write_lock(&parent->hnode_rwlock);
+			list_add(&cnode->siblings, &pnode->children);
+			write_unlock(&parent->hnode_rwlock);
+		}
+	}
+	cnode->parent = parent;
+	class_unlock(child);
+	return;
+}
+
+static int ckrm_remove_child(struct ckrm_core_class *child)
+{
+	struct ckrm_hnode *cnode, *pnode;
+	struct ckrm_core_class *parent;
+
+	if (!ckrm_is_core_valid(child)) {
+		printk(KERN_ERR "Invalid child %p given"
+		       		" in ckrm_remove_child\n",
+		       	child);
+		return 0;
+	}
+
+	cnode = &child->hnode;
+	parent = cnode->parent;
+	if (!ckrm_is_core_valid(parent)) {
+		printk(KERN_ERR "Invalid parent %p in ckrm_remove_child\n",
+		       parent);
+		return 0;
+	}
+
+	pnode = &parent->hnode;
+
+	class_lock(child);
+	/* ensure that the node does not have children */
+	if (!list_empty(&cnode->children)) {
+		class_unlock(child);
+		return 0;
+	}
+	write_lock(&parent->hnode_rwlock);
+	list_del(&cnode->siblings);
+	write_unlock(&parent->hnode_rwlock);
+	cnode->parent = NULL;
+	class_unlock(child);
+	return 1;
+}
+
+void ckrm_lock_hier(struct ckrm_core_class *parent)
+{
+	if (ckrm_is_core_valid(parent)) {
+		read_lock(&parent->hnode_rwlock);
+	}
+}
+
+void ckrm_unlock_hier(struct ckrm_core_class *parent)
+{
+	if (ckrm_is_core_valid(parent)) {
+		read_unlock(&parent->hnode_rwlock);
+	}
+}
+
+/*
+ * hnode_rwlock of the parent core class must held in read mode.
+ * external callers should 've called ckrm_lock_hier before calling this
+ * function.
+ */
+#define hnode_2_core(ptr) \
+((ptr)? container_of(ptr, struct ckrm_core_class, hnode) : NULL)
+
+struct ckrm_core_class *ckrm_get_next_child(struct ckrm_core_class *parent,
+					    struct ckrm_core_class *child)
+{
+	struct list_head *cnode;
+	struct ckrm_hnode *next_cnode;
+	struct ckrm_core_class *next_childcore;
+
+	if (!ckrm_is_core_valid(parent)) {
+		printk(KERN_ERR "Invalid parent %p in ckrm_get_next_child\n",
+		       parent);
+		return NULL;
+	}
+	if (list_empty(&parent->hnode.children)) {
+		return NULL;
+	}
+	if (child) {
+		if (!ckrm_is_core_valid(child)) {
+			printk(KERN_ERR
+			       "Invalid child %p in ckrm_get_next_child\n",
+			       child);
+			return NULL;
+		}
+		cnode = child->hnode.siblings.next;
+	} else {
+		cnode = parent->hnode.children.next;
+	}
+
+	if (cnode == &parent->hnode.children) {	/* back at the anchor */
+		return NULL;
+	}
+
+	next_cnode = container_of(cnode, struct ckrm_hnode, siblings);
+	next_childcore = hnode_2_core(next_cnode);
+
+	if (!ckrm_is_core_valid(next_childcore)) {
+		printk(KERN_ERR
+		       "Invalid next child %p in ckrm_get_next_child\n",
+		       next_childcore);
+		return NULL;
+	}
+	return next_childcore;
+}
+
+EXPORT_SYMBOL_GPL(ckrm_lock_hier);
+EXPORT_SYMBOL_GPL(ckrm_unlock_hier);
+EXPORT_SYMBOL_GPL(ckrm_get_next_child);
+
+static void
+ckrm_alloc_res_class(struct ckrm_core_class *core,
+		     struct ckrm_core_class *parent, int resid)
+{
+
+	struct ckrm_classtype *clstype;
+	/* 
+	 * Allocate a resource class only if the resource controller has
+	 * registered with core and the engine requests for the class.
+	 */
+	if (!ckrm_is_core_valid(core))
+		return;
+	clstype = core->classtype;
+	core->res_class[resid] = NULL;
+
+	if (test_bit(resid, &clstype->bit_res_ctlrs)) {
+		ckrm_res_ctlr_t *rcbs;
+
+		atomic_inc(&clstype->nr_resusers[resid]);
+		rcbs = clstype->res_ctlrs[resid];
+
+		if (rcbs && rcbs->res_alloc) {
+			core->res_class[resid] =
+			    (*rcbs->res_alloc) (core, parent);
+			if (core->res_class[resid])
+				return;
+			printk(KERN_ERR "Error creating res class\n");
+		}
+		atomic_dec(&clstype->nr_resusers[resid]);
+	}
+}
+
+/*
+ * Initialize a core class
+ *
+ */
+
+#define CLS_DEBUG(fmt, args...) \
+do { /* printk("%s: " fmt, __FUNCTION__ , ## args); */ } while (0)
+
+int
+ckrm_init_core_class(struct ckrm_classtype *clstype,
+		     struct ckrm_core_class *dcore,
+		     struct ckrm_core_class *parent, const char *name)
+{
+	/* TODO:  Should replace name with dentry or add dentry? */
+	int i;
+
+	/* TODO:  How is this used in initialization? */
+	CLS_DEBUG("name %s => %p\n", name ? name : "default", dcore);
+	if ((dcore != clstype->default_class) && (!ckrm_is_core_valid(parent))){
+		printk("error not a valid parent %p\n", parent);
+		return -EINVAL;
+	}
+	dcore->classtype = clstype;
+	dcore->magic = CKRM_CORE_MAGIC;
+	dcore->name = name;
+	dcore->class_lock = SPIN_LOCK_UNLOCKED;
+	dcore->hnode_rwlock = RW_LOCK_UNLOCKED;
+	dcore->delayed = 0;
+
+	atomic_set(&dcore->refcnt, 0);
+	write_lock(&ckrm_class_lock);
+
+	INIT_LIST_HEAD(&dcore->objlist);
+	list_add_tail(&dcore->clslist, &clstype->classes);
+
+	clstype->num_classes++;
+	set_callbacks_active(clstype);
+
+	write_unlock(&ckrm_class_lock);
+	ckrm_add_child(parent, dcore);
+
+	for (i = 0; i < clstype->max_resid; i++)
+		ckrm_alloc_res_class(dcore, parent, i);
+
+	/* fix for race condition seen in stress with numtasks */
+	if (parent)
+		ckrm_core_grab(parent);
+
+	ckrm_core_grab(dcore);
+	return 0;
+}
+
+static void ckrm_free_res_class(struct ckrm_core_class *core, int resid)
+{
+	/* 
+	 * Free a resource class only if the resource controller has
+	 * registered with core 
+	 */
+	if (core->res_class[resid]) {
+		ckrm_res_ctlr_t *rcbs;
+		struct ckrm_classtype *clstype = core->classtype;
+
+		atomic_inc(&clstype->nr_resusers[resid]);
+		rcbs = clstype->res_ctlrs[resid];
+
+		if (rcbs->res_free) {
+			(*rcbs->res_free) (core->res_class[resid]);
+			// compensate inc in alloc
+			atomic_dec(&clstype->nr_resusers[resid]); 
+		}
+		atomic_dec(&clstype->nr_resusers[resid]);
+	}
+	core->res_class[resid] = NULL;
+}
+
+/*
+ * Free a core class 
+ *   requires that all tasks were previously reassigned to another class
+ *
+ * Returns 0 on success -errno on failure.
+ */
+
+void ckrm_free_core_class(struct ckrm_core_class *core)
+{
+	int i;
+	struct ckrm_classtype *clstype = core->classtype;
+	struct ckrm_core_class *parent = core->hnode.parent;
+
+	CLS_DEBUG("core=%p:%s parent=%p:%s\n", core, core->name, parent,
+		  parent->name);
+	if (core->delayed) {
+		/* this core was marked as late */
+		printk("class <%s> finally deleted %lu\n", core->name, jiffies);
+	}
+	if (ckrm_remove_child(core) == 0) {
+		printk("Core class removal failed. Chilren present\n");
+	}
+	for (i = 0; i < clstype->max_resid; i++) {
+		ckrm_free_res_class(core, i);
+	}
+
+	write_lock(&ckrm_class_lock);
+	/* Clear the magic, so we would know if this core is reused. */
+	core->magic = 0;
+#if 0				/* Dynamic not yet enabled */
+	core->res_class = NULL;
+#endif
+	/* Remove this core class from its linked list. */
+	list_del(&core->clslist);
+	clstype->num_classes--;
+	set_callbacks_active(clstype);
+	write_unlock(&ckrm_class_lock);
+
+	/* fix for race condition seen in stress with numtasks */
+	if (parent)
+		ckrm_core_drop(parent);
+
+	kfree(core);
+}
+
+int ckrm_release_core_class(struct ckrm_core_class *core)
+{
+	if (!ckrm_is_core_valid(core)) {
+		// Invalid core
+		return (-EINVAL);
+	}
+
+	if (core == core->classtype->default_class)
+		return 0;
+
+	/* need to make sure that the classgot really dropped */
+	if (atomic_read(&core->refcnt) != 1) {
+		CLS_DEBUG("class <%s> deletion delayed refcnt=%d jif=%ld\n",
+			  core->name, atomic_read(&core->refcnt), jiffies);
+		core->delayed = 1;	/* just so we have a ref point */
+	}
+	ckrm_core_drop(core);
+	return 0;
+}
+
+/*
+ * Interfaces for the resource controller
+ */
+/*
+ * Registering a callback structure by the resource controller.
+ *
+ * Returns the resource id(0 or +ve) on success, -errno for failure.
+ */
+static int
+ckrm_register_res_ctlr_intern(struct ckrm_classtype *clstype,
+			      ckrm_res_ctlr_t * rcbs)
+{
+	int resid, ret, i;
+
+	if (!rcbs)
+		return -EINVAL;
+
+	resid = rcbs->resid;
+
+	spin_lock(&clstype->res_ctlrs_lock);
+	printk(KERN_WARNING "resid is %d name is %s %s\n",
+	       resid, rcbs->res_name, clstype->res_ctlrs[resid]->res_name);
+	if (resid >= 0) {
+		if ((resid < CKRM_MAX_RES_CTLRS)
+		    && (clstype->res_ctlrs[resid] == NULL)) {
+			clstype->res_ctlrs[resid] = rcbs;
+			atomic_set(&clstype->nr_resusers[resid], 0);
+			set_bit(resid, &clstype->bit_res_ctlrs);
+			ret = resid;
+			if (resid >= clstype->max_resid) {
+				clstype->max_resid = resid + 1;
+			}
+		} else {
+			ret = -EBUSY;
+		}
+		spin_unlock(&clstype->res_ctlrs_lock);
+		return ret;
+	}
+	for (i = clstype->resid_reserved; i < clstype->max_res_ctlrs; i++) {
+		if (clstype->res_ctlrs[i] == NULL) {
+			clstype->res_ctlrs[i] = rcbs;
+			rcbs->resid = i;
+			atomic_set(&clstype->nr_resusers[i], 0);
+			set_bit(i, &clstype->bit_res_ctlrs);
+			if (i >= clstype->max_resid) {
+				clstype->max_resid = i + 1;
+			}
+			spin_unlock(&clstype->res_ctlrs_lock);
+			return i;
+		}
+	}
+	spin_unlock(&clstype->res_ctlrs_lock);
+	return (-ENOMEM);
+}
+
+int
+ckrm_register_res_ctlr(struct ckrm_classtype *clstype, ckrm_res_ctlr_t * rcbs)
+{
+	struct ckrm_core_class *core;
+	int resid;
+
+	resid = ckrm_register_res_ctlr_intern(clstype, rcbs);
+
+	if (resid >= 0) {
+		/* run through all classes and create the resource class 
+		 * object and if necessary "initialize" class in context 
+		 * of this resource 
+		 */
+		read_lock(&ckrm_class_lock);
+		list_for_each_entry(core, &clstype->classes, clslist) {
+			printk("CKRM .. create res clsobj for resouce <%s>"
+			       "class <%s> par=%p\n", rcbs->res_name, 
+			       core->name, core->hnode.parent);
+			ckrm_alloc_res_class(core, core->hnode.parent, resid);
+
+			if (clstype->add_resctrl) { 
+				/* FIXME: this should be mandatory */
+				(*clstype->add_resctrl) (core, resid);
+			}
+		}
+		read_unlock(&ckrm_class_lock);
+	}
+	return resid;
+}
+
+/*
+ * Unregistering a callback structure by the resource controller.
+ *
+ * Returns 0 on success -errno for failure.
+ */
+int ckrm_unregister_res_ctlr(struct ckrm_res_ctlr *rcbs)
+{
+	struct ckrm_classtype *clstype = rcbs->classtype;
+	struct ckrm_core_class *core = NULL;
+	int resid = rcbs->resid;
+
+	if ((clstype == NULL) || (resid < 0)) {
+		return -EINVAL;
+	}
+	/* TODO: probably need to also call deregistration function */
+
+	read_lock(&ckrm_class_lock);
+	/* free up this resource from all the classes */
+	list_for_each_entry(core, &clstype->classes, clslist) {
+		ckrm_free_res_class(core, resid);
+	}
+	read_unlock(&ckrm_class_lock);
+
+	if (atomic_read(&clstype->nr_resusers[resid])) {
+		return -EBUSY;
+	}
+
+	spin_lock(&clstype->res_ctlrs_lock);
+	clstype->res_ctlrs[resid] = NULL;
+	clear_bit(resid, &clstype->bit_res_ctlrs);
+	clstype->max_resid = fls(clstype->bit_res_ctlrs);
+	rcbs->resid = -1;
+	spin_unlock(&clstype->res_ctlrs_lock);
+
+	return 0;
+}
+
+/*
+ * Class Type Registration
+ */
+
+/* TODO: What locking is needed here?*/
+
+struct ckrm_classtype *ckrm_classtypes[CKRM_MAX_CLASSTYPES];
+EXPORT_SYMBOL_GPL(ckrm_classtypes);	
+
+int ckrm_register_classtype(struct ckrm_classtype *clstype)
+{
+	int tid = clstype->typeID;
+
+	if (tid != -1) {
+		if ((tid < 0) || (tid > CKRM_MAX_CLASSTYPES)
+		    || (ckrm_classtypes[tid]))
+			return -EINVAL;
+	} else {
+		int i;
+		for (i = CKRM_RESV_CLASSTYPES; i < CKRM_MAX_CLASSTYPES; i++) {
+			if (ckrm_classtypes[i] == NULL) {
+				tid = i;
+				break;
+			}
+		}
+	}
+	if (tid == -1)
+		return -EBUSY;
+	clstype->typeID = tid;
+	ckrm_classtypes[tid] = clstype;
+
+	/* TODO: Need to call the callbacks of the RCFS client */
+	if (rcfs_fn.register_classtype) {
+		(*rcfs_fn.register_classtype) (clstype);
+		/* No error return for now. */
+	}
+	return tid;
+}
+
+int ckrm_unregister_classtype(struct ckrm_classtype *clstype)
+{
+	int tid = clstype->typeID;
+
+	if ((tid < 0) || (tid > CKRM_MAX_CLASSTYPES)
+	    || (ckrm_classtypes[tid] != clstype))
+		return -EINVAL;
+
+	if (rcfs_fn.deregister_classtype) {
+		(*rcfs_fn.deregister_classtype) (clstype);
+		// No error return for now
+	}
+
+	ckrm_classtypes[tid] = NULL;
+	clstype->typeID = -1;
+	return 0;
+}
+
+struct ckrm_classtype *ckrm_find_classtype_by_name(const char *name)
+{
+	int i;
+	for (i = 0; i < CKRM_MAX_CLASSTYPES; i++) {
+		struct ckrm_classtype *ctype = ckrm_classtypes[i];
+		if (ctype && !strncmp(ctype->name, name, CKRM_MAX_TYPENAME_LEN))
+			return ctype;
+	}
+	return NULL;
+}
+
+/*
+ *   Generic Functions that can be used as default functions 
+ *   in almost all classtypes
+ *     (a) function iterator over all resource classes of a class
+ *     (b) function invoker on a named resource
+ */
+
+int ckrm_class_show_shares(struct ckrm_core_class *core, struct seq_file *seq)
+{
+	int i;
+	struct ckrm_res_ctlr *rcbs;
+	struct ckrm_classtype *clstype = core->classtype;
+	struct ckrm_shares shares;
+
+	for (i = 0; i < clstype->max_resid; i++) {
+		atomic_inc(&clstype->nr_resusers[i]);
+		rcbs = clstype->res_ctlrs[i];
+		if (rcbs && rcbs->get_share_values) {
+			(*rcbs->get_share_values) (core->res_class[i], &shares);
+			seq_printf(seq,"res=%s,guarantee=%d,limit=%d,"
+				   "total_guarantee=%d,max_limit=%d\n",
+				   rcbs->res_name, shares.my_guarantee,
+				   shares.my_limit, shares.total_guarantee,
+				   shares.max_limit);
+		}
+		atomic_dec(&clstype->nr_resusers[i]);
+	}
+	return 0;
+}
+
+int ckrm_class_show_stats(struct ckrm_core_class *core, struct seq_file *seq)
+{
+	int i;
+	struct ckrm_res_ctlr *rcbs;
+	struct ckrm_classtype *clstype = core->classtype;
+
+	for (i = 0; i < clstype->max_resid; i++) {
+		atomic_inc(&clstype->nr_resusers[i]);
+		rcbs = clstype->res_ctlrs[i];
+		if (rcbs && rcbs->get_stats)
+			(*rcbs->get_stats) (core->res_class[i], seq);
+		atomic_dec(&clstype->nr_resusers[i]);
+	}
+	return 0;
+}
+
+int ckrm_class_show_config(struct ckrm_core_class *core, struct seq_file *seq)
+{
+	int i;
+	struct ckrm_res_ctlr *rcbs;
+	struct ckrm_classtype *clstype = core->classtype;
+
+	for (i = 0; i < clstype->max_resid; i++) {
+		atomic_inc(&clstype->nr_resusers[i]);
+		rcbs = clstype->res_ctlrs[i];
+		if (rcbs && rcbs->show_config)
+			(*rcbs->show_config) (core->res_class[i], seq);
+		atomic_dec(&clstype->nr_resusers[i]);
+	}
+	return 0;
+}
+
+int ckrm_class_set_config(struct ckrm_core_class *core, const char *resname,
+			  const char *cfgstr)
+{
+	struct ckrm_classtype *clstype = core->classtype;
+	struct ckrm_res_ctlr *rcbs = ckrm_resctlr_lookup(clstype, resname);
+	int rc;
+
+	if (rcbs == NULL || rcbs->set_config == NULL)
+		return -EINVAL;
+	rc = (*rcbs->set_config) (core->res_class[rcbs->resid], cfgstr);
+	return rc;
+}
+
+#define legalshare(a)   \
+         ( ((a) >=0) \
+	   || ((a) == CKRM_SHARE_UNCHANGED) \
+	   || ((a) == CKRM_SHARE_DONTCARE) )
+
+int ckrm_class_set_shares(struct ckrm_core_class *core, const char *resname,
+			  struct ckrm_shares *shares)
+{
+	struct ckrm_classtype *clstype = core->classtype;
+	struct ckrm_res_ctlr *rcbs;
+	int rc;
+
+	/* Check for legal values */
+	if (!legalshare(shares->my_guarantee) || !legalshare(shares->my_limit)
+	    || !legalshare(shares->total_guarantee)
+	    || !legalshare(shares->max_limit))
+		return -EINVAL;
+
+	rcbs = ckrm_resctlr_lookup(clstype, resname);
+	if (rcbs == NULL || rcbs->set_share_values == NULL)
+		return -EINVAL;
+	rc = (*rcbs->set_share_values) (core->res_class[rcbs->resid], shares);
+	return rc;
+}
+
+int ckrm_class_reset_stats(struct ckrm_core_class *core, const char *resname,
+			   const char *unused)
+{
+	struct ckrm_classtype *clstype = core->classtype;
+	struct ckrm_res_ctlr *rcbs = ckrm_resctlr_lookup(clstype, resname);
+	int rc;
+
+	if (rcbs == NULL || rcbs->reset_stats == NULL)
+		return -EINVAL;
+	rc = (*rcbs->reset_stats) (core->res_class[rcbs->resid]);
+	return rc;
+}
+
+/*
+ * Initialization
+ */
+
+void ckrm_cb_newtask(struct task_struct *tsk)
+{
+	tsk->ce_data = NULL;
+	spin_lock_init(&tsk->ckrm_tsklock);
+	ckrm_invoke_event_cb_chain(CKRM_EVENT_NEWTASK, tsk);
+}
+
+void ckrm_cb_exit(struct task_struct *tsk)
+{
+	ckrm_invoke_event_cb_chain(CKRM_EVENT_EXIT, tsk);
+	tsk->ce_data = NULL;
+}
+
+void __init ckrm_init(void)
+{
+	printk("CKRM Initialization\n");
+
+	// register/initialize the Metatypes
+
+#ifdef CONFIG_CKRM_TYPE_TASKCLASS
+	{
+		extern void ckrm_meta_init_taskclass(void);
+		ckrm_meta_init_taskclass();
+	}
+#endif
+#ifdef CONFIG_CKRM_TYPE_SOCKETCLASS
+	{
+		extern void ckrm_meta_init_sockclass(void);
+		ckrm_meta_init_sockclass();
+	}
+#endif
+	// prepare init_task and then rely on inheritance of properties
+	ckrm_cb_newtask(&init_task);
+	printk("CKRM Initialization done\n");
+}
+
+EXPORT_SYMBOL_GPL(ckrm_register_engine);
+EXPORT_SYMBOL_GPL(ckrm_unregister_engine);
+
+EXPORT_SYMBOL_GPL(ckrm_register_res_ctlr);
+EXPORT_SYMBOL_GPL(ckrm_unregister_res_ctlr);
+
+EXPORT_SYMBOL_GPL(ckrm_init_core_class);
+EXPORT_SYMBOL_GPL(ckrm_free_core_class);
+EXPORT_SYMBOL_GPL(ckrm_release_core_class);
+
+EXPORT_SYMBOL_GPL(ckrm_register_classtype);
+EXPORT_SYMBOL_GPL(ckrm_unregister_classtype);
+EXPORT_SYMBOL_GPL(ckrm_find_classtype_by_name);
+
+EXPORT_SYMBOL_GPL(ckrm_core_grab);
+EXPORT_SYMBOL_GPL(ckrm_core_drop);
+EXPORT_SYMBOL_GPL(ckrm_is_core_valid);
+EXPORT_SYMBOL_GPL(ckrm_validate_and_grab_core);
+
+EXPORT_SYMBOL_GPL(ckrm_register_event_set);
+EXPORT_SYMBOL_GPL(ckrm_unregister_event_set);
+EXPORT_SYMBOL_GPL(ckrm_register_event_cb);
+EXPORT_SYMBOL_GPL(ckrm_unregister_event_cb);
+
+EXPORT_SYMBOL_GPL(ckrm_class_show_stats);
+EXPORT_SYMBOL_GPL(ckrm_class_show_config);
+EXPORT_SYMBOL_GPL(ckrm_class_show_shares);
+
+EXPORT_SYMBOL_GPL(ckrm_class_set_config);
+EXPORT_SYMBOL_GPL(ckrm_class_set_shares);
+
+EXPORT_SYMBOL_GPL(ckrm_class_reset_stats);
Index: linux-2.6.10-rc2/kernel/ckrm/ckrmutils.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.10-rc2/kernel/ckrm/ckrmutils.c	2004-11-19 20:43:43.524211803 -0800
@@ -0,0 +1,195 @@
+/*
+ * ckrmutils.c - Utility functions for CKRM
+ *
+ * Copyright (C) Chandra Seetharaman,  IBM Corp. 2003
+ *           (C) Hubertus Franke    ,  IBM Corp. 2004
+ * 
+ * Provides simple utility functions for the core module, CE and resource
+ * controllers.
+ *
+ * Latest version, more details at http://ckrm.sf.net
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+/*
+ * Changes
+ * 
+ * 13 Nov 2003
+ *        Created
+ */
+
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/mount.h>
+#include <linux/module.h>
+#include <linux/ckrm_rc.h>
+
+int get_exe_path_name(struct task_struct *tsk, char *buf, int buflen)
+{
+	struct vm_area_struct *vma;
+	struct vfsmount *mnt;
+	struct mm_struct *mm = get_task_mm(tsk);
+	struct dentry *dentry;
+	char *lname;
+	int rc = 0;
+
+	*buf = '\0';
+	if (!mm) {
+		return -EINVAL;
+	}
+	down_read(&mm->mmap_sem);
+	vma = mm->mmap;
+	while (vma) {
+		if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) {
+			dentry = dget(vma->vm_file->f_dentry);
+			mnt = mntget(vma->vm_file->f_vfsmnt);
+			lname = d_path(dentry, mnt, buf, buflen);
+			if (!IS_ERR(lname)) {
+				strncpy(buf, lname, strlen(lname) + 1);
+			} else {
+				rc = (int)PTR_ERR(lname);
+			}
+			mntput(mnt);
+			dput(dentry);
+			break;
+		}
+		vma = vma->vm_next;
+	}
+	up_read(&mm->mmap_sem);
+	mmput(mm);
+	return rc;
+}
+
+/*
+ * must be called with cnt_lock of parres held
+ * Caller is responsible for making sure that the new guarantee doesn't
+ * overflow parent's total guarantee.
+ */
+void child_guarantee_changed(struct ckrm_shares *parent, int cur, int new)
+{
+	if (new == cur || !parent) {
+		return;
+	}
+	if (new != CKRM_SHARE_DONTCARE) {
+		parent->unused_guarantee -= new;
+	}
+	if (cur != CKRM_SHARE_DONTCARE) {
+		parent->unused_guarantee += cur;
+	}
+	return;
+}
+
+/*
+ * must be called with cnt_lock of parres held
+ * Caller is responsible for making sure that the new limit is not more 
+ * than parent's max_limit
+ */
+void child_maxlimit_changed(struct ckrm_shares *parent, int new_limit)
+{
+	if (parent && parent->cur_max_limit < new_limit) {
+		parent->cur_max_limit = new_limit;
+	}
+	return;
+}
+
+/*
+ * Caller is responsible for holding any lock to protect the data
+ * structures passed to this function
+ */
+int
+set_shares(struct ckrm_shares *new, struct ckrm_shares *cur,
+	   struct ckrm_shares *par)
+{
+	int rc = -EINVAL;
+	int cur_usage_guar = cur->total_guarantee - cur->unused_guarantee;
+	int increase_by = new->my_guarantee - cur->my_guarantee;
+
+	/* Check total_guarantee for correctness */
+	if (new->total_guarantee <= CKRM_SHARE_DONTCARE) {
+		goto set_share_err;
+	} else if (new->total_guarantee == CKRM_SHARE_UNCHANGED) {
+		/* do nothing */;
+	} else if (cur_usage_guar > new->total_guarantee) {
+		goto set_share_err;
+	}
+	/* Check max_limit for correctness */
+	if (new->max_limit <= CKRM_SHARE_DONTCARE) {
+		goto set_share_err;
+	} else if (new->max_limit == CKRM_SHARE_UNCHANGED) {
+		/* do nothing */;
+	} else if (cur->cur_max_limit > new->max_limit) {
+		goto set_share_err;
+	}
+	/* Check my_guarantee for correctness */
+	if (new->my_guarantee == CKRM_SHARE_UNCHANGED) {
+		/* do nothing */;
+	} else if (new->my_guarantee == CKRM_SHARE_DONTCARE) {
+		/* do nothing */;
+	} else if (par && increase_by > par->unused_guarantee) {
+		goto set_share_err;
+	}
+	/* Check my_limit for correctness */
+	if (new->my_limit == CKRM_SHARE_UNCHANGED) {
+		/* do nothing */;
+	} else if (new->my_limit == CKRM_SHARE_DONTCARE) {
+		/* do nothing */;
+	} else if (par && new->my_limit > par->max_limit) {
+		/* I can't get more limit than my parent's limit */
+		goto set_share_err;
+
+	}
+	/* make sure guarantee is lesser than limit */
+	if (new->my_limit == CKRM_SHARE_DONTCARE) {
+		/* do nothing */;
+	} else if (new->my_limit == CKRM_SHARE_UNCHANGED) {
+		if (new->my_guarantee == CKRM_SHARE_DONTCARE) {
+			/* do nothing */;
+		} else if (new->my_guarantee == CKRM_SHARE_UNCHANGED) {
+			/*
+			 * do nothing; earlier setting would have
+			 * taken care of it
+			 */;
+		} else if (new->my_guarantee > cur->my_limit) {
+			goto set_share_err;
+		}
+	} else {		/* new->my_limit has a valid value */
+		if (new->my_guarantee == CKRM_SHARE_DONTCARE) {
+			/* do nothing */;
+		} else if (new->my_guarantee == CKRM_SHARE_UNCHANGED) {
+			if (cur->my_guarantee > new->my_limit) {
+				goto set_share_err;
+			}
+		} else if (new->my_guarantee > new->my_limit) {
+			goto set_share_err;
+		}
+	}
+	if (new->my_guarantee != CKRM_SHARE_UNCHANGED) {
+		child_guarantee_changed(par, cur->my_guarantee,
+					new->my_guarantee);
+		cur->my_guarantee = new->my_guarantee;
+	}
+	if (new->my_limit != CKRM_SHARE_UNCHANGED) {
+		child_maxlimit_changed(par, new->my_limit);
+		cur->my_limit = new->my_limit;
+	}
+	if (new->total_guarantee != CKRM_SHARE_UNCHANGED) {
+		cur->unused_guarantee = new->total_guarantee - cur_usage_guar;
+		cur->total_guarantee = new->total_guarantee;
+	}
+	if (new->max_limit != CKRM_SHARE_UNCHANGED) {
+		cur->max_limit = new->max_limit;
+	}
+	rc = 0;
+set_share_err:
+	return rc;
+}
+
+EXPORT_SYMBOL_GPL(get_exe_path_name);
+EXPORT_SYMBOL_GPL(child_guarantee_changed);
+EXPORT_SYMBOL_GPL(child_maxlimit_changed);
+EXPORT_SYMBOL_GPL(set_shares);
Index: linux-2.6.10-rc2/kernel/ckrm/Makefile
===================================================================
--- linux-2.6.10-rc2.orig/kernel/ckrm/Makefile	2004-11-19 20:40:52.528302080 -0800
+++ linux-2.6.10-rc2/kernel/ckrm/Makefile	2004-11-19 20:43:43.526211486 -0800
@@ -3,5 +3,5 @@
 #
 
 ifeq ($(CONFIG_CKRM),y)
-    obj-y = ckrm_events.o
+    obj-y = ckrm_events.o ckrm.o ckrmutils.o
 endif	

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] CKRM: 3/10 CKRM:  Core ckrm, rcfs
  2004-11-29 18:47 [PATCH] CKRM: 3/10 CKRM: Core ckrm, rcfs Gerrit Huizenga
@ 2004-11-29 22:00 ` Greg KH
  2004-11-29 22:28   ` Roland Dreier
                     ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Greg KH @ 2004-11-29 22:00 UTC (permalink / raw)
  To: Gerrit Huizenga; +Cc: linux-kernel, akpm, Rik van Riel, Chris Mason, ckrm-tech

On Mon, Nov 29, 2004 at 10:47:32AM -0800, Gerrit Huizenga wrote:
> +/* Changes
> + *
> + * 12 Nov 2003
> + *        Created.
> + * 22 Apr 2004
> + *        Adopted to classtypes
> + */

Ok, I'm not going to say this for every future file... :)

> +#ifdef __KERNEL__

Not needed.

> +typedef void *(*ce_classify_fct_t) (enum ckrm_event event, void *obj, ...);
> +typedef void (*ce_notify_fct_t) (enum ckrm_event event, void *classobj,
> +				 void *obj);

Ick.  Don't put a _t at the end of a typedef.  Wrong OS style guide.

> +typedef struct ckrm_eng_callback {

no typedef.

> +	/* general state information */
> +	int always_callback;	/* set if CE should always be called back 
> +				   regardless of numclasses */
> +
> +	/* callbacks which are called without holding locks */
> +
> +	unsigned long c_interest;	/* set of classification events of 
> +					 * interest to CE 
> +					 */
> +
> +	/* generic classify */
> +	ce_classify_fct_t classify;
> +
> +	/* class added */
> +	void (*class_add) (const char *name, void *core, int classtype);
> +
> +	/* class deleted */
> +	void (*class_delete) (const char *name, void *core, int classtype);
> +
> +	/* callbacks which are called while holding task_lock(tsk) */
> +	unsigned long n_interest;	/* set of notification events of 
> +					 *  interest to CE 
> +					 */
> +	/* notify on class switch */
> +	ce_notify_fct_t notify;	
> +} ckrm_eng_callback_t;

Especially one that ends in _t again :(

> +struct inode;
> +struct dentry;
> +
> +typedef struct rbce_eng_callback {
> +	int (*mkdir) (struct inode *, struct dentry *, int);	/* mkdir */
> +	int (*rmdir) (struct inode *, struct dentry *);		/* rmdir */
> +	int (*mnt) (void);
> +	int (*umnt) (void);
> +} rbce_eng_callback_t;

Again with the unneeded typedef.  Come on Gerrit, you should know
better...

> +extern int ckrm_register_engine(const char *name, ckrm_eng_callback_t *);
> +extern int ckrm_unregister_engine(const char *name);
> +
> +extern void *ckrm_classobj(char *, int *classtype);
> +extern int get_exe_path_name(struct task_struct *t, char *filename,
> +			     int max_size);

Wasn't this function in some other header file already?

> +
> +extern int rcfs_register_engine(rbce_eng_callback_t *);
> +extern int rcfs_unregister_engine(rbce_eng_callback_t *);
> +
> +extern int ckrm_reclassify(int pid);
> +
> +#ifndef _LINUX_CKRM_RC_H
> +
> +extern void ckrm_core_grab(void *);
> +extern void ckrm_core_drop(void *);

void *?  You can't use a proper type?

> +typedef struct ckrm_shares {
> +	int my_guarantee;
> +	int my_limit;
> +	int total_guarantee;
> +	int max_limit;
> +	int unused_guarantee;	/* not used as parameters */
> +	int cur_max_limit;	/* not used as parameters */
> +} ckrm_shares_t;

Consider this the last of the "no more typedefs except for function
pointers" reminders for the rest of the code base.

> +
> +#define CKRM_SHARE_UNCHANGED     (-1)	
> +#define CKRM_SHARE_DONTCARE      (-2)	
> +#define CKRM_SHARE_DFLT_TOTAL_GUARANTEE (100) 
> +#define CKRM_SHARE_DFLT_MAX_LIMIT     (100)  

Trailing whitespace that is a tab, but yet, no tab within the define
itself.  Odd creature.

> +#define CKRM_CORE_MAGIC		0xBADCAFFE

"Magic" checks should not be needed at all.  Please drop them all.
> +typedef struct ckrm_hnode {
> +	struct ckrm_core_class *parent;
> +	struct list_head siblings;	
> +	struct list_head children;	
> +} ckrm_hnode_t;

I'm going to be over here in the corner, sobbing into my old CodingStyle
presentations that I know I forced you to sit through a number of
times... {sniff}

> +#define ckrm_get_res_class(rescls, resid, type) \
> +	((type*) (((resid != -1) && ((rescls) != NULL) \
> +			   && ((rescls) != (void *)-1)) ? \
> +	 ((struct ckrm_core_class *)(rescls))->res_class[resid] : NULL))

What exactly are you trying to do with this macro?  Cast to see if a
pointer is not -1?  That doesn't sound very safe...

> +static inline void ckrm_core_grab(struct ckrm_core_class *core)
> +{
> +	if (core)
> +		atomic_inc(&core->refcnt);
> +}

Please just use kref, don't invent your own reference counting.

> +/*
> + * iterate through all associate resource controllers:
> + * requires following arguments (ckrm_core_class *cls, 
> + *                               ckrm_res_ctrl   *ctlr,
> + *                               void            *robj,
> + *                               int              bmap)
> + */
> +
> +#define forall_class_resobjs(cls,rcbs,robj,bmap)			\
> +       for ( bmap=((cls->classtype)->bit_res_ctlrs) ;			\
> +	     ({ int rid; ((rid=ffs(bmap)-1) >= 0) &&			\
> +	                 (bmap &= ~(1<<rid),				\
> +				((rcbs=cls->classtype->res_ctlrs[rid])	\
> +				 && (robj=cls->res_class[rid]))); });	\
> +           )

Use kerneldoc comments if you are going to take the time to actually
document a macro.

> +	ckrm_init();

Can't just make it an initcall?  What's wrong with the existing 7 levels
that we have?

> +#include <linux/config.h>
> +#include <linux/init.h>
> +#include <linux/linkage.h>
> +#include <linux/kernel.h>
> +#include <linux/errno.h>
> +#include <asm/uaccess.h>
> +#include <linux/mm.h>
> +#include <asm/errno.h>
> +#include <linux/string.h>
> +#include <linux/list.h>
> +#include <linux/spinlock.h>
> +#include <linux/module.h>
> +#include <linux/ckrm_rc.h>
> +#include <linux/rcfs.h>
> +#include <net/sock.h>
> +#include <linux/ip.h>

asm includes after the regular ones please.

> +rwlock_t ckrm_class_lock = RW_LOCK_UNLOCKED;	/* protects classlists */

Isn't there a new way to define locks in an initialized state?  Ah yes,
this should use rwlock_init() instead.

> +struct rcfs_functions rcfs_fn;
> +EXPORT_SYMBOL_GPL(rcfs_fn);

I don't understand.  Portions of ckrm are released under the LGPL, while
others are under the GPL?  Why the difference?

> +/*
> + * Return TRUE if the given resource is registered.
> + */

What is "TRUE"?  True in the kernel sense is usually 0 :)

> +struct ckrm_res_ctlr *ckrm_resctlr_lookup(struct ckrm_classtype *clstype,
> +					  const char *resname)
> +{
> +	int resid = -1;
> +
> +	if (!clstype || !resname) {
> +		return NULL;
> +	}

No extra {} needed.

> +/* given a classname return the class handle and its classtype*/
> +void *ckrm_classobj(char *classname, int *classTypeID)

Why not use proper kerneldoc form of comments if you are going to try to
document the api?

> +	*classTypeID = -1;

MixedCaseVariablesAreNotTheLinuxWay.
PleaseReadDocumentationCodingStyleYetAgain.

> +EXPORT_SYMBOL_GPL(is_res_regd);

Not the nicest global symbol.  Why not put ckrm at the front?

> +/*
> + * Registering a callback structure by the classification engine.
> + *
> + * Returns typeId of class on success -errno for failure.
> + */
> +int ckrm_register_engine(const char *typename, ckrm_eng_callback_t * ecbs)
> +{
> +	struct ckrm_classtype *ctype;
> +
> +	ctype = ckrm_find_classtype_by_name(typename);
> +	if (ctype == NULL)
> +		return (-ENOENT);
> +
> +	atomic_inc(&ctype->ce_regd);
> +
> +	/* another engine registered or trying to register ? */
> +	if (atomic_read(&ctype->ce_regd) != 1) {
> +		atomic_dec(&ctype->ce_regd);
> +		return (-EBUSY);
> +	}

Why not just use a lock if you are worried about this?

> +int ckrm_unregister_engine(const char *typename)
> +{
> +	struct ckrm_classtype *ctype;
> +
> +	ctype = ckrm_find_classtype_by_name(typename);
> +	if (ctype == NULL)
> +		return (-ENOENT);

return is not a function.

> +#define CLS_DEBUG(fmt, args...) \
> +do { /* printk("%s: " fmt, __FUNCTION__ , ## args); */ } while (0)

Again, use pr_debug() please.

> +int
> +ckrm_init_core_class(struct ckrm_classtype *clstype,
> +		     struct ckrm_core_class *dcore,
> +		     struct ckrm_core_class *parent, const char *name)
> +{
> +	/* TODO:  Should replace name with dentry or add dentry? */

Shouldn't the TODO's be done by now?


> +	int i;
> +
> +	/* TODO:  How is this used in initialization? */

This makes me feel warm and fuzzy...

> +	CLS_DEBUG("name %s => %p\n", name ? name : "default", dcore);
> +	if ((dcore != clstype->default_class) && (!ckrm_is_core_valid(parent))){
> +		printk("error not a valid parent %p\n", parent);

printk() always needs a KERN_ value or the kernel janitors will come
running after you with their pitchforks.  You do this in a lot of
different places, please fix all of them.

> +	if ((clstype == NULL) || (resid < 0)) {
> +		return -EINVAL;
> +	}

Again, {} not needed here, and in lots of other single statment if
lines.  Please fix them all.

> +	/* TODO: probably need to also call deregistration function */

Well, do you? :)

> +	/* TODO: Need to call the callbacks of the RCFS client */
> +	if (rcfs_fn.register_classtype) {
> +		(*rcfs_fn.register_classtype) (clstype);
> +		/* No error return for now. */

Why no error return?

> +	}
> +	return tid;
> +}
> +
> +int ckrm_unregister_classtype(struct ckrm_classtype *clstype)
> +{
> +	int tid = clstype->typeID;
> +
> +	if ((tid < 0) || (tid > CKRM_MAX_CLASSTYPES)
> +	    || (ckrm_classtypes[tid] != clstype))
> +		return -EINVAL;
> +
> +	if (rcfs_fn.deregister_classtype) {
> +		(*rcfs_fn.deregister_classtype) (clstype);
> +		// No error return for now

Again, why no error?

> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6.10-rc2/kernel/ckrm/ckrmutils.c	2004-11-19 20:43:43.524211803 -0800
> @@ -0,0 +1,195 @@
> +/*
> + * ckrmutils.c - Utility functions for CKRM
> + *
> + * Copyright (C) Chandra Seetharaman,  IBM Corp. 2003
> + *           (C) Hubertus Franke    ,  IBM Corp. 2004
> + * 
> + * Provides simple utility functions for the core module, CE and resource
> + * controllers.
> + *
> + * Latest version, more details at http://ckrm.sf.net
> + * 
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.

You sure you want the "any later version" on here?  :)

> +int get_exe_path_name(struct task_struct *tsk, char *buf, int buflen)

Path name in what namespace?  I don't think this code is correct, or
valid, or even legal.  Why do you need the full pathname of a program?

> +/*
> + * must be called with cnt_lock of parres held

Please put the proper sparse documention in the function call that
checks for this then.


> + * Caller is responsible for holding any lock to protect the data
> + * structures passed to this function
> + */
> +int
> +set_shares(struct ckrm_shares *new, struct ckrm_shares *cur,
> +	   struct ckrm_shares *par)
> +{
> +	int rc = -EINVAL;
> +	int cur_usage_guar = cur->total_guarantee - cur->unused_guarantee;
> +	int increase_by = new->my_guarantee - cur->my_guarantee;
> +
> +	/* Check total_guarantee for correctness */
> +	if (new->total_guarantee <= CKRM_SHARE_DONTCARE) {
> +		goto set_share_err;
> +	} else if (new->total_guarantee == CKRM_SHARE_UNCHANGED) {
> +		/* do nothing */;
> +	} else if (cur_usage_guar > new->total_guarantee) {
> +		goto set_share_err;
> +	}
> +	/* Check max_limit for correctness */
> +	if (new->max_limit <= CKRM_SHARE_DONTCARE) {
> +		goto set_share_err;
> +	} else if (new->max_limit == CKRM_SHARE_UNCHANGED) {
> +		/* do nothing */;
> +	} else if (cur->cur_max_limit > new->max_limit) {
> +		goto set_share_err;
> +	}
> +	/* Check my_guarantee for correctness */
> +	if (new->my_guarantee == CKRM_SHARE_UNCHANGED) {
> +		/* do nothing */;
> +	} else if (new->my_guarantee == CKRM_SHARE_DONTCARE) {
> +		/* do nothing */;
> +	} else if (par && increase_by > par->unused_guarantee) {
> +		goto set_share_err;
> +	}
> +	/* Check my_limit for correctness */
> +	if (new->my_limit == CKRM_SHARE_UNCHANGED) {
> +		/* do nothing */;
> +	} else if (new->my_limit == CKRM_SHARE_DONTCARE) {
> +		/* do nothing */;
> +	} else if (par && new->my_limit > par->max_limit) {
> +		/* I can't get more limit than my parent's limit */
> +		goto set_share_err;
> +
> +	}
> +	/* make sure guarantee is lesser than limit */
> +	if (new->my_limit == CKRM_SHARE_DONTCARE) {
> +		/* do nothing */;
> +	} else if (new->my_limit == CKRM_SHARE_UNCHANGED) {
> +		if (new->my_guarantee == CKRM_SHARE_DONTCARE) {
> +			/* do nothing */;
> +		} else if (new->my_guarantee == CKRM_SHARE_UNCHANGED) {
> +			/*
> +			 * do nothing; earlier setting would have
> +			 * taken care of it
> +			 */;
> +		} else if (new->my_guarantee > cur->my_limit) {
> +			goto set_share_err;
> +		}
> +	} else {		/* new->my_limit has a valid value */
> +		if (new->my_guarantee == CKRM_SHARE_DONTCARE) {
> +			/* do nothing */;
> +		} else if (new->my_guarantee == CKRM_SHARE_UNCHANGED) {
> +			if (cur->my_guarantee > new->my_limit) {
> +				goto set_share_err;
> +			}
> +		} else if (new->my_guarantee > new->my_limit) {
> +			goto set_share_err;
> +		}
> +	}
> +	if (new->my_guarantee != CKRM_SHARE_UNCHANGED) {
> +		child_guarantee_changed(par, cur->my_guarantee,
> +					new->my_guarantee);
> +		cur->my_guarantee = new->my_guarantee;
> +	}
> +	if (new->my_limit != CKRM_SHARE_UNCHANGED) {
> +		child_maxlimit_changed(par, new->my_limit);
> +		cur->my_limit = new->my_limit;
> +	}
> +	if (new->total_guarantee != CKRM_SHARE_UNCHANGED) {
> +		cur->unused_guarantee = new->total_guarantee - cur_usage_guar;
> +		cur->total_guarantee = new->total_guarantee;
> +	}
> +	if (new->max_limit != CKRM_SHARE_UNCHANGED) {
> +		cur->max_limit = new->max_limit;
> +	}
> +	rc = 0;
> +set_share_err:
> +	return rc;
> +}

There's a whole lot of "nothing" going on in this function.  Care to
optimise it to get rid of those types of checks?  Or are you relying on
the compiler to do it for you?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] CKRM: 3/10 CKRM:  Core ckrm, rcfs
  2004-11-29 22:00 ` Greg KH
@ 2004-11-29 22:28   ` Roland Dreier
  2004-11-29 22:33     ` Greg KH
  2004-11-30  7:46   ` Arjan van de Ven
  2005-02-24  9:33   ` Gerrit Huizenga
  2 siblings, 1 reply; 13+ messages in thread
From: Roland Dreier @ 2004-11-29 22:28 UTC (permalink / raw)
  To: Greg KH
  Cc: Gerrit Huizenga, linux-kernel, akpm, Rik van Riel, Chris Mason,
	ckrm-tech

    Greg> Ick.  Don't put a _t at the end of a typedef.  Wrong OS
    Greg> style guide.

Just out of curiousity, who wrote the line

	typedef int __bitwise kobject_action_t;

in <linux/kobject_uevent.h>?  From the changelog it almost looks like
you did it ;)

 - Roland

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] CKRM: 3/10 CKRM:  Core ckrm, rcfs
  2004-11-29 22:28   ` Roland Dreier
@ 2004-11-29 22:33     ` Greg KH
  2004-11-29 22:46       ` Randy.Dunlap
  0 siblings, 1 reply; 13+ messages in thread
From: Greg KH @ 2004-11-29 22:33 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Gerrit Huizenga, linux-kernel, akpm, Rik van Riel, Chris Mason,
	ckrm-tech

On Mon, Nov 29, 2004 at 02:28:32PM -0800, Roland Dreier wrote:
>     Greg> Ick.  Don't put a _t at the end of a typedef.  Wrong OS
>     Greg> style guide.
> 
> Just out of curiousity, who wrote the line
> 
> 	typedef int __bitwise kobject_action_t;
> 
> in <linux/kobject_uevent.h>?  From the changelog it almost looks like
> you did it ;)

Yeah, at Linus's insistance.  See his email about the whole __bitwise
stuff for that :(

But I did it for a simple variable type.  Not a structure.

/me justifies it to himself somehow...

greg k-h

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] CKRM: 3/10 CKRM:  Core ckrm, rcfs
  2004-11-29 22:33     ` Greg KH
@ 2004-11-29 22:46       ` Randy.Dunlap
  0 siblings, 0 replies; 13+ messages in thread
From: Randy.Dunlap @ 2004-11-29 22:46 UTC (permalink / raw)
  To: Greg KH
  Cc: Roland Dreier, Gerrit Huizenga, linux-kernel, akpm, Rik van Riel,
	Chris Mason, ckrm-tech

Greg KH wrote:
> On Mon, Nov 29, 2004 at 02:28:32PM -0800, Roland Dreier wrote:
> 
>>    Greg> Ick.  Don't put a _t at the end of a typedef.  Wrong OS
>>    Greg> style guide.
>>
>>Just out of curiousity, who wrote the line
>>
>>	typedef int __bitwise kobject_action_t;
>>
>>in <linux/kobject_uevent.h>?  From the changelog it almost looks like
>>you did it ;)
> 
> 
> Yeah, at Linus's insistance.  See his email about the whole __bitwise
> stuff for that :(
> 
> But I did it for a simple variable type.  Not a structure.
> 
> /me justifies it to himself somehow...

And you are right to do so.  Typedefs for base types make a lot of
sense.  Just don't obfuscate structs with them.

-- 
~Randy

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] CKRM: 3/10 CKRM:  Core ckrm, rcfs
  2004-11-29 22:00 ` Greg KH
  2004-11-29 22:28   ` Roland Dreier
@ 2004-11-30  7:46   ` Arjan van de Ven
  2005-02-24  9:33   ` Gerrit Huizenga
  2 siblings, 0 replies; 13+ messages in thread
From: Arjan van de Ven @ 2004-11-30  7:46 UTC (permalink / raw)
  To: Greg KH
  Cc: Gerrit Huizenga, linux-kernel, akpm, Rik van Riel, Chris Mason,
	ckrm-tech

On Mon, 2004-11-29 at 14:00 -0800, Greg KH wrote:
> > +struct rcfs_functions rcfs_fn;
> > +EXPORT_SYMBOL_GPL(rcfs_fn);
> 
> I don't understand.  Portions of ckrm are released under the LGPL, while
> others are under the GPL?  Why the difference?

the headers are LGPL it seems, the code GPL. Quite a sensible
arrangement imo



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] CKRM: 3/10 CKRM: Core ckrm, rcfs
  2004-11-29 22:00 ` Greg KH
  2004-11-29 22:28   ` Roland Dreier
  2004-11-30  7:46   ` Arjan van de Ven
@ 2005-02-24  9:33   ` Gerrit Huizenga
  2005-02-24 17:52     ` Greg KH
  2 siblings, 1 reply; 13+ messages in thread
From: Gerrit Huizenga @ 2005-02-24  9:33 UTC (permalink / raw)
  To: Greg KH; +Cc: linux-kernel, akpm, Rik van Riel, Chris Mason, ckrm-tech


On Mon, 29 Nov 2004 14:00:47 PST, Greg KH wrote:
> On Mon, Nov 29, 2004 at 10:47:32AM -0800, Gerrit Huizenga wrote:
> > +/* Changes
> > + *
> > + * 12 Nov 2003
> > + *        Created.
> > + * 22 Apr 2004
> > + *        Adopted to classtypes
> > + */
> 
> Ok, I'm not going to say this for every future file... :)
 
Good - I won't have to say "globally fixed" for each one, then.  ;)

> > +#ifdef __KERNEL__
> 
> Not needed.

Ditto.

> > +typedef void *(*ce_classify_fct_t) (enum ckrm_event event, void *obj, ...);
> > +typedef void (*ce_notify_fct_t) (enum ckrm_event event, void *classobj,
> > +				 void *obj);
> 
> Ick.  Don't put a _t at the end of a typedef.  Wrong OS style guide.
 
Fixed.  Although this isn't an OS style guide thing - it is a Posix
driven convention whereby any header file defined in the standard
automatically has _t suffixed variables reserved to the implementation,
e.g. no application is define variables using _t.  This header file isn't
being used by user level applications so it doesn't matter.

> > +typedef struct ckrm_eng_callback {
> 
> no typedef.

Fixed (globally).

> > +	/* general state information */
> > +	int always_callback;	/* set if CE should always be called back 
> > +				   regardless of numclasses */
> > +
> > +	/* callbacks which are called without holding locks */
> > +
> > +	unsigned long c_interest;	/* set of classification events of 
> > +					 * interest to CE 
> > +					 */
> > +
> > +	/* generic classify */
> > +	ce_classify_fct_t classify;
> > +
> > +	/* class added */
> > +	void (*class_add) (const char *name, void *core, int classtype);
> > +
> > +	/* class deleted */
> > +	void (*class_delete) (const char *name, void *core, int classtype);
> > +
> > +	/* callbacks which are called while holding task_lock(tsk) */
> > +	unsigned long n_interest;	/* set of notification events of 
> > +					 *  interest to CE 
> > +					 */
> > +	/* notify on class switch */
> > +	ce_notify_fct_t notify;	
> > +} ckrm_eng_callback_t;
> 
> Especially one that ends in _t again :(
 
Fixed (globally).

> > +struct inode;
> > +struct dentry;
> > +
> > +typedef struct rbce_eng_callback {
> > +	int (*mkdir) (struct inode *, struct dentry *, int);	/* mkdir */
> > +	int (*rmdir) (struct inode *, struct dentry *);		/* rmdir */
> > +	int (*mnt) (void);
> > +	int (*umnt) (void);
> > +} rbce_eng_callback_t;
> 
> Again with the unneeded typedef.  Come on Gerrit, you should know
> better...
 
Sorry, years of implementing Posix conformant OS's and system header
files make this very common for anyone (including several of the
CKRM developers).  Specifically because of user level name space
collision avoidance issues (e.g. think preserving backwards compatibility
for user level apps).  It is the primary mechanism for simplifying the
#ifdef __KERNEL__ crap used in most OS's.

> > +extern int ckrm_register_engine(const char *name, ckrm_eng_callback_t *);
> > +extern int ckrm_unregister_engine(const char *name);
> > +
> > +extern void *ckrm_classobj(char *, int *classtype);
> > +extern int get_exe_path_name(struct task_struct *t, char *filename,
> > +			     int max_size);
> 
> Wasn't this function in some other header file already?
 
And equally unnecessary in the current code.  Fixed.

> > +
> > +extern int rcfs_register_engine(rbce_eng_callback_t *);
> > +extern int rcfs_unregister_engine(rbce_eng_callback_t *);
> > +
> > +extern int ckrm_reclassify(int pid);
> > +
> > +#ifndef _LINUX_CKRM_RC_H
> > +
> > +extern void ckrm_core_grab(void *);
> > +extern void ckrm_core_drop(void *);
> 
> void *?  You can't use a proper type?
 
That was odd - definition was correct, declaration was silly.  Fixed.

> > +typedef struct ckrm_shares {
> > +	int my_guarantee;
> > +	int my_limit;
> > +	int total_guarantee;
> > +	int max_limit;
> > +	int unused_guarantee;	/* not used as parameters */
> > +	int cur_max_limit;	/* not used as parameters */
> > +} ckrm_shares_t;
> 
> Consider this the last of the "no more typedefs except for function
> pointers" reminders for the rest of the code base.
 
Good enough.  All applied.

> > +
> > +#define CKRM_SHARE_UNCHANGED     (-1)	
> > +#define CKRM_SHARE_DONTCARE      (-2)	
> > +#define CKRM_SHARE_DFLT_TOTAL_GUARANTEE (100) 
> > +#define CKRM_SHARE_DFLT_MAX_LIMIT     (100)  
> 
> Trailing whitespace that is a tab, but yet, no tab within the define
> itself.  Odd creature.
 
Yeah, I'm not sure what some of the original authors used for editors
or if they just had big thumbs resting on the space bar.  Fixed.

> > +#define CKRM_CORE_MAGIC		0xBADCAFFE
>
> "Magic" checks should not be needed at all.  Please drop them all.
 
I'd like to leave them in while we are testing with -mm to help
tracking down any potential problems.  Prior to going to Linus',
yes, I think it makes sense to get rid of these.

> > +typedef struct ckrm_hnode {
> > +	struct ckrm_core_class *parent;
> > +	struct list_head siblings;	
> > +	struct list_head children;	
> > +} ckrm_hnode_t;
> 
> I'm going to be over here in the corner, sobbing into my old CodingStyle
> presentations that I know I forced you to sit through a number of
> times... {sniff}
 
I *would* recommend you update the coding style to point out the
Posix convention - it saves people from using __KERNEL__ in some
cases, and I *know* how much you hate that one, too.  ;-)

> > +#define ckrm_get_res_class(rescls, resid, type) \
> > +	((type*) (((resid != -1) && ((rescls) != NULL) \
> > +			   && ((rescls) != (void *)-1)) ? \
> > +	 ((struct ckrm_core_class *)(rescls))->res_class[resid] : NULL))
> 
> What exactly are you trying to do with this macro?  Cast to see if a
> pointer is not -1?  That doesn't sound very safe...

This needs to be fixed and better commented.  Basically, when a task
is exiting, it's class can be set to -1 (-1 in a pointer is, uh, icky).
But when uninitialized, it is set to NULL.  We need to come up with
a better fix for this one.

> > +static inline void ckrm_core_grab(struct ckrm_core_class *core)
> > +{
> > +	if (core)
> > +		atomic_inc(&core->refcnt);
> > +}
> 
> Please just use kref, don't invent your own reference counting.
 
I agree with this but haven't gotten to it yet.  It will take
a bit more transformation since the current code is 0 based references
and kref_t's appear to be initialized to 1.  Also, the interactions with
freeing code will need just a little bit of thought.  So I'm deferring
this for the moment but not dropping it.

> > +/*
> > + * iterate through all associate resource controllers:
> > + * requires following arguments (ckrm_core_class *cls, 
> > + *                               ckrm_res_ctrl   *ctlr,
> > + *                               void            *robj,
> > + *                               int              bmap)
> > + */
> > +
> > +#define forall_class_resobjs(cls,rcbs,robj,bmap)			\
> > +       for ( bmap=((cls->classtype)->bit_res_ctlrs) ;			\
> > +	     ({ int rid; ((rid=ffs(bmap)-1) >= 0) &&			\
> > +	                 (bmap &= ~(1<<rid),				\
> > +				((rcbs=cls->classtype->res_ctlrs[rid])	\
> > +				 && (robj=cls->res_class[rid]))); });	\
> > +           )
> 
> Use kerneldoc comments if you are going to take the time to actually
> document a macro.

Will do a more full sweep for doing kerneldoc since we are updating
documentation in general at the moment.

> > +	ckrm_init();
> 
> Can't just make it an initcall?  What's wrong with the existing 7 levels
> that we have?
 
Okay, the initcalls hurt my head for a moment.  It looks like a quick
converstion to initcalls might change the order of events and I want
to hold off on that until I can make sure it doesn't break other things.
There are several order dependencies between the core, the filesystem
and the (potential) modules for controllers.

> > +#include <linux/config.h>
> > +#include <linux/init.h>
> > +#include <linux/linkage.h>
> > +#include <linux/kernel.h>
> > +#include <linux/errno.h>
> > +#include <asm/uaccess.h>
> > +#include <linux/mm.h>
> > +#include <asm/errno.h>
> > +#include <linux/string.h>
> > +#include <linux/list.h>
> > +#include <linux/spinlock.h>
> > +#include <linux/module.h>
> > +#include <linux/ckrm_rc.h>
> > +#include <linux/rcfs.h>
> > +#include <net/sock.h>
> > +#include <linux/ip.h>
> 
> asm includes after the regular ones please.
 
Done.

> > +rwlock_t ckrm_class_lock = RW_LOCK_UNLOCKED;	/* protects classlists */
> 
> Isn't there a new way to define locks in an initialized state?  Ah yes,
> this should use rwlock_init() instead.
 
Done.

> > +struct rcfs_functions rcfs_fn;
> > +EXPORT_SYMBOL_GPL(rcfs_fn);
> 
> I don't understand.  Portions of ckrm are released under the LGPL, while
> others are under the GPL?  Why the difference?
 
Okay - I *just* realized why you've mentioned the LGPL twice.  Which is
there because some of the headers contain definitions that *could*
be used by user level applications of CKRM.  But the licensing looks
screwed up slightly at the moment.  I'll straighten that up in the
next release.  The goal is to minimize the exports to user land while
still allowing some header files to be shared.  It isn't done correctly
IMHO right now.  Will fix.

> > +/*
> > + * Return TRUE if the given resource is registered.
> > + */
> 
> What is "TRUE"?  True in the kernel sense is usually 0 :)

So if (0) is true.  Wow.  Modified comment to say "non-zero".  I think
every definition of TRUE I could find was "1".  But TRUE is just an
arbitrary definition.  ;-)

> > +struct ckrm_res_ctlr *ckrm_resctlr_lookup(struct ckrm_classtype *clstype,
> > +					  const char *resname)
> > +{
> > +	int resid = -1;
> > +
> > +	if (!clstype || !resname) {
> > +		return NULL;
> > +	}
> 
> No extra {} needed.
 
Fixed.

> > +/* given a classname return the class handle and its classtype*/
> > +void *ckrm_classobj(char *classname, int *classTypeID)
> 
> Why not use proper kerneldoc form of comments if you are going to try to
> document the api?
 
Will do in the future.

> > +	*classTypeID = -1;
> 
> MixedCaseVariablesAreNotTheLinuxWay.
> PleaseReadDocumentationCodingStyleYetAgain.
 
Fixed.

> > +EXPORT_SYMBOL_GPL(is_res_regd);
> 
> Not the nicest global symbol.  Why not put ckrm at the front?
 
Fixed.

> > +/*
> > + * Registering a callback structure by the classification engine.
> > + *
> > + * Returns typeId of class on success -errno for failure.
> > + */
> > +int ckrm_register_engine(const char *typename, ckrm_eng_callback_t * ecbs)
> > +{
> > +	struct ckrm_classtype *ctype;
> > +
> > +	ctype = ckrm_find_classtype_by_name(typename);
> > +	if (ctype == NULL)
> > +		return (-ENOENT);
> > +
> > +	atomic_inc(&ctype->ce_regd);
> > +
> > +	/* another engine registered or trying to register ? */
> > +	if (atomic_read(&ctype->ce_regd) != 1) {
> > +		atomic_dec(&ctype->ce_regd);
> > +		return (-EBUSY);
> > +	}
> 
> Why not just use a lock if you are worried about this?
 
Wanted to avoid holding a lock while crossing the module boundary.
And, this is a very unlikely race.

> > +int ckrm_unregister_engine(const char *typename)
> > +{
> > +	struct ckrm_classtype *ctype;
> > +
> > +	ctype = ckrm_find_classtype_by_name(typename);
> > +	if (ctype == NULL)
> > +		return (-ENOENT);
> 
> return is not a function.
 
*snicker*  yeah.  See that space - must be a paranthetical expression.  ;)
Fixed.

> > +#define CLS_DEBUG(fmt, args...) \
> > +do { /* printk("%s: " fmt, __FUNCTION__ , ## args); */ } while (0)
> 
> Again, use pr_debug() please.
 
Done.

> > +int
> > +ckrm_init_core_class(struct ckrm_classtype *clstype,
> > +		     struct ckrm_core_class *dcore,
> > +		     struct ckrm_core_class *parent, const char *name)
> > +{
> > +	/* TODO:  Should replace name with dentry or add dentry? */
> 
> Shouldn't the TODO's be done by now?
 
This is an evolving project.  We could always do more.  I'd rather
release early and often (which we haven't done well so far) than to
dump a 100% complete project out at the end of development.
 
> > +	int i;
> > +
> > +	/* TODO:  How is this used in initialization? */
> 
> This makes me feel warm and fuzzy...

Looks like a stale comment but I'll verify with the original author
before I yank it.

> > +	CLS_DEBUG("name %s => %p\n", name ? name : "default", dcore);
> > +	if ((dcore != clstype->default_class) && (!ckrm_is_core_valid(parent))){
> > +		printk("error not a valid parent %p\n", parent);
> 
> printk() always needs a KERN_ value or the kernel janitors will come
> running after you with their pitchforks.  You do this in a lot of
> different places, please fix all of them.

Fixed with pr_debug().

> > +	if ((clstype == NULL) || (resid < 0)) {
> > +		return -EINVAL;
> > +	}
> 
> Again, {} not needed here, and in lots of other single statment if
> lines.  Please fix them all.
 
I think I have now.

> > +	/* TODO: probably need to also call deregistration function */
> 
> Well, do you? :)
> 
> > +	/* TODO: Need to call the callbacks of the RCFS client */
> > +	if (rcfs_fn.register_classtype) {
> > +		(*rcfs_fn.register_classtype) (clstype);
> > +		/* No error return for now. */
> 
> Why no error return?
> 
> > +	}
> > +	return tid;
> > +}
> > +
> > +int ckrm_unregister_classtype(struct ckrm_classtype *clstype)
> > +{
> > +	int tid = clstype->typeID;
> > +
> > +	if ((tid < 0) || (tid > CKRM_MAX_CLASSTYPES)
> > +	    || (ckrm_classtypes[tid] != clstype))
> > +		return -EINVAL;
> > +
> > +	if (rcfs_fn.deregister_classtype) {
> > +		(*rcfs_fn.deregister_classtype) (clstype);
> > +		// No error return for now
> 
> Again, why no error?
 
None of the controllers currently generate an error, even though
in the future they might.

> > --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> > +++ linux-2.6.10-rc2/kernel/ckrm/ckrmutils.c	2004-11-19 20:43:43.524211803 -0800
> > @@ -0,0 +1,195 @@
> > +/*
> > + * ckrmutils.c - Utility functions for CKRM
> > + *
> > + * Copyright (C) Chandra Seetharaman,  IBM Corp. 2003
> > + *           (C) Hubertus Franke    ,  IBM Corp. 2004
> > + * 
> > + * Provides simple utility functions for the core module, CE and resource
> > + * controllers.
> > + *
> > + * Latest version, more details at http://ckrm.sf.net
> > + * 
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation; either version 2 of the License, or
> > + * (at your option) any later version.
> 
> You sure you want the "any later version" on here?  :)
 
Nope.  Copied from somewhere.  Fixed.

> > +int get_exe_path_name(struct task_struct *tsk, char *buf, int buflen)
> 
> Path name in what namespace?  I don't think this code is correct, or
> valid, or even legal.  Why do you need the full pathname of a program?
 
Gone.  Kabut.  Removed.  Unnneeded.

> > +/*
> > + * must be called with cnt_lock of parres held
> 
> Please put the proper sparse documention in the function call that
> checks for this then.
 
Will do.  Not in the next patch set but full sparse work coming soon.

> > + * Caller is responsible for holding any lock to protect the data
> > + * structures passed to this function
> > + */
> > +int
> > +set_shares(struct ckrm_shares *new, struct ckrm_shares *cur,
> > +	   struct ckrm_shares *par)
> > +{
> > +	int rc = -EINVAL;
> > +	int cur_usage_guar = cur->total_guarantee - cur->unused_guarantee;
> > +	int increase_by = new->my_guarantee - cur->my_guarantee;
> > +
> > +	/* Check total_guarantee for correctness */
> > +	if (new->total_guarantee <= CKRM_SHARE_DONTCARE) {
> > +		goto set_share_err;
> > +	} else if (new->total_guarantee == CKRM_SHARE_UNCHANGED) {
> > +		/* do nothing */;
> > +	} else if (cur_usage_guar > new->total_guarantee) {
> > +		goto set_share_err;
> > +	}
> > +	/* Check max_limit for correctness */
> > +	if (new->max_limit <= CKRM_SHARE_DONTCARE) {
> > +		goto set_share_err;
> > +	} else if (new->max_limit == CKRM_SHARE_UNCHANGED) {
> > +		/* do nothing */;
> > +	} else if (cur->cur_max_limit > new->max_limit) {
> > +		goto set_share_err;
> > +	}
> > +	/* Check my_guarantee for correctness */
> > +	if (new->my_guarantee == CKRM_SHARE_UNCHANGED) {
> > +		/* do nothing */;
> > +	} else if (new->my_guarantee == CKRM_SHARE_DONTCARE) {
> > +		/* do nothing */;
> > +	} else if (par && increase_by > par->unused_guarantee) {
> > +		goto set_share_err;
> > +	}
> > +	/* Check my_limit for correctness */
> > +	if (new->my_limit == CKRM_SHARE_UNCHANGED) {
> > +		/* do nothing */;
> > +	} else if (new->my_limit == CKRM_SHARE_DONTCARE) {
> > +		/* do nothing */;
> > +	} else if (par && new->my_limit > par->max_limit) {
> > +		/* I can't get more limit than my parent's limit */
> > +		goto set_share_err;
> > +
> > +	}
> > +	/* make sure guarantee is lesser than limit */
> > +	if (new->my_limit == CKRM_SHARE_DONTCARE) {
> > +		/* do nothing */;
> > +	} else if (new->my_limit == CKRM_SHARE_UNCHANGED) {
> > +		if (new->my_guarantee == CKRM_SHARE_DONTCARE) {
> > +			/* do nothing */;
> > +		} else if (new->my_guarantee == CKRM_SHARE_UNCHANGED) {
> > +			/*
> > +			 * do nothing; earlier setting would have
> > +			 * taken care of it
> > +			 */;
> > +		} else if (new->my_guarantee > cur->my_limit) {
> > +			goto set_share_err;
> > +		}
> > +	} else {		/* new->my_limit has a valid value */
> > +		if (new->my_guarantee == CKRM_SHARE_DONTCARE) {
> > +			/* do nothing */;
> > +		} else if (new->my_guarantee == CKRM_SHARE_UNCHANGED) {
> > +			if (cur->my_guarantee > new->my_limit) {
> > +				goto set_share_err;
> > +			}
> > +		} else if (new->my_guarantee > new->my_limit) {
> > +			goto set_share_err;
> > +		}
> > +	}
> > +	if (new->my_guarantee != CKRM_SHARE_UNCHANGED) {
> > +		child_guarantee_changed(par, cur->my_guarantee,
> > +					new->my_guarantee);
> > +		cur->my_guarantee = new->my_guarantee;
> > +	}
> > +	if (new->my_limit != CKRM_SHARE_UNCHANGED) {
> > +		child_maxlimit_changed(par, new->my_limit);
> > +		cur->my_limit = new->my_limit;
> > +	}
> > +	if (new->total_guarantee != CKRM_SHARE_UNCHANGED) {
> > +		cur->unused_guarantee = new->total_guarantee - cur_usage_guar;
> > +		cur->total_guarantee = new->total_guarantee;
> > +	}
> > +	if (new->max_limit != CKRM_SHARE_UNCHANGED) {
> > +		cur->max_limit = new->max_limit;
> > +	}
> > +	rc = 0;
> > +set_share_err:
> > +	return rc;
> > +}
> 
> There's a whole lot of "nothing" going on in this function.  Care to
> optimise it to get rid of those types of checks?  Or are you relying on
> the compiler to do it for you?
 
Most of this should be pretty straightforward for optimization but
yeah, some code wrangling could make this both easier to read and
more likely easier to optimize.  Probably will convert this to
a bunch of shorter, more focused inline functions.  Will save this
for a future release, though.

thanks!

gerrit

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] CKRM: 3/10 CKRM: Core ckrm, rcfs
  2005-02-24  9:33   ` Gerrit Huizenga
@ 2005-02-24 17:52     ` Greg KH
  2005-02-24 20:54       ` Gerrit Huizenga
  0 siblings, 1 reply; 13+ messages in thread
From: Greg KH @ 2005-02-24 17:52 UTC (permalink / raw)
  To: Gerrit Huizenga; +Cc: linux-kernel, akpm, Rik van Riel, Chris Mason, ckrm-tech

On Thu, Feb 24, 2005 at 01:33:12AM -0800, Gerrit Huizenga wrote:
> On Mon, 29 Nov 2004 14:00:47 PST, Greg KH wrote:
> > On Mon, Nov 29, 2004 at 10:47:32AM -0800, Gerrit Huizenga wrote:
> > > +typedef void *(*ce_classify_fct_t) (enum ckrm_event event, void *obj, ...);
> > > +typedef void (*ce_notify_fct_t) (enum ckrm_event event, void *classobj,
> > > +				 void *obj);
> > 
> > Ick.  Don't put a _t at the end of a typedef.  Wrong OS style guide.
>  
> Fixed.  Although this isn't an OS style guide thing - it is a Posix
> driven convention whereby any header file defined in the standard
> automatically has _t suffixed variables reserved to the implementation,
> e.g. no application is define variables using _t.  This header file isn't
> being used by user level applications so it doesn't matter.

But Linux kernel internals are not driven by Posix "conventions", hence,
my objection.

> > Again with the unneeded typedef.  Come on Gerrit, you should know
> > better...
>  
> Sorry, years of implementing Posix conformant OS's and system header
> files make this very common for anyone (including several of the
> CKRM developers).  Specifically because of user level name space
> collision avoidance issues (e.g. think preserving backwards compatibility
> for user level apps).  It is the primary mechanism for simplifying the
> #ifdef __KERNEL__ crap used in most OS's.

If you are going to write Linux kernel code, use the proper style rules.
No matter how many years working on other oses, it doesn't matter, you
know better than to try to bring up that kind of objection...

thanks,

greg k-h
> > > +#define ckrm_get_res_class(rescls, resid, type) \
> > > +	((type*) (((resid != -1) && ((rescls) != NULL) \
> > > +			   && ((rescls) != (void *)-1)) ? \
> > > +	 ((struct ckrm_core_class *)(rescls))->res_class[resid] : NULL))
> > 
> > What exactly are you trying to do with this macro?  Cast to see if a
> > pointer is not -1?  That doesn't sound very safe...
> 
> This needs to be fixed and better commented.  Basically, when a task
> is exiting, it's class can be set to -1 (-1 in a pointer is, uh, icky).
> But when uninitialized, it is set to NULL.  We need to come up with
> a better fix for this one.

Setting a pointer to -1 is, uh, wrong.  Please fix this, as it's just
broken.

> > > +static inline void ckrm_core_grab(struct ckrm_core_class *core)
> > > +{
> > > +	if (core)
> > > +		atomic_inc(&core->refcnt);
> > > +}
> > 
> > Please just use kref, don't invent your own reference counting.
>  
> I agree with this but haven't gotten to it yet.  It will take
> a bit more transformation since the current code is 0 based references
> and kref_t's appear to be initialized to 1.  Also, the interactions with
> freeing code will need just a little bit of thought.  So I'm deferring
> this for the moment but not dropping it.

It doesn't matter if kref (there is no kref_t, I don't know where you
got that from) is initialized to 42.  The whole point is the reference
counting is handled properly for you, and you don't care, or know, what
the actual count is, or how it works underneath.  Just that it works
properly.

Please fix the code to use kref, that's what it is there for (see my
section in the OLS CodingStyle paper about using core kernel functions,
and not reinventing your own...)

> > > +	ckrm_init();
> > 
> > Can't just make it an initcall?  What's wrong with the existing 7 levels
> > that we have?
>  
> Okay, the initcalls hurt my head for a moment.  It looks like a quick
> converstion to initcalls might change the order of events and I want
> to hold off on that until I can make sure it doesn't break other things.
> There are several order dependencies between the core, the filesystem
> and the (potential) modules for controllers.

Then just pick the proper init call level.  That's what they are there
for.  Don't use this kind of patch for your subsystem, unless there is
no other way to get it initialized properly.  And if so, please let us
know why not.

> > > +/*
> > > + * Return TRUE if the given resource is registered.
> > > + */
> > 
> > What is "TRUE"?  True in the kernel sense is usually 0 :)
> 
> So if (0) is true.  Wow.  Modified comment to say "non-zero".  I think
> every definition of TRUE I could find was "1".  But TRUE is just an
> arbitrary definition.  ;-)

No, return 0 if the given resource is registered.  That's the kernel
calling convention, please follow it.


> > > +/*
> > > + * Registering a callback structure by the classification engine.
> > > + *
> > > + * Returns typeId of class on success -errno for failure.
> > > + */
> > > +int ckrm_register_engine(const char *typename, ckrm_eng_callback_t * ecbs)
> > > +{
> > > +	struct ckrm_classtype *ctype;
> > > +
> > > +	ctype = ckrm_find_classtype_by_name(typename);
> > > +	if (ctype == NULL)
> > > +		return (-ENOENT);
> > > +
> > > +	atomic_inc(&ctype->ce_regd);
> > > +
> > > +	/* another engine registered or trying to register ? */
> > > +	if (atomic_read(&ctype->ce_regd) != 1) {
> > > +		atomic_dec(&ctype->ce_regd);
> > > +		return (-EBUSY);
> > > +	}
> > 
> > Why not just use a lock if you are worried about this?
>  
> Wanted to avoid holding a lock while crossing the module boundary.
> And, this is a very unlikely race.

Crossing what module boundry?  This is at startup time, and you don't
need to worry about lock speeds, right?  Please don't try to reinvent a
lock with atomic values for something like this.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] CKRM: 3/10 CKRM: Core ckrm, rcfs
  2005-02-24 17:52     ` Greg KH
@ 2005-02-24 20:54       ` Gerrit Huizenga
  2005-02-24 21:11         ` Greg KH
  0 siblings, 1 reply; 13+ messages in thread
From: Gerrit Huizenga @ 2005-02-24 20:54 UTC (permalink / raw)
  To: Greg KH; +Cc: linux-kernel, akpm, Rik van Riel, Chris Mason, ckrm-tech


On Thu, 24 Feb 2005 09:52:23 PST, Greg KH wrote:
> On Thu, Feb 24, 2005 at 01:33:12AM -0800, Gerrit Huizenga wrote:
> > On Mon, 29 Nov 2004 14:00:47 PST, Greg KH wrote:
> > > On Mon, Nov 29, 2004 at 10:47:32AM -0800, Gerrit Huizenga wrote:
> > > > +typedef void *(*ce_classify_fct_t) (enum ckrm_event event, void *obj, ...);
> > > > +typedef void (*ce_notify_fct_t) (enum ckrm_event event, void *classobj,
> > > > +				 void *obj);
> > > 
> > > Ick.  Don't put a _t at the end of a typedef.  Wrong OS style guide.
> >  
> > Fixed.  Although this isn't an OS style guide thing - it is a Posix
> > driven convention whereby any header file defined in the standard
> > automatically has _t suffixed variables reserved to the implementation,
> > e.g. no application is define variables using _t.  This header file isn't
> > being used by user level applications so it doesn't matter.
> 
> But Linux kernel internals are not driven by Posix "conventions", hence,
> my objection.
 
So what is the recommended way of making header files safe for both
kernel and user level consumption when a header file contains
structure definitions suitable for user/kernel communication?

Currently, I don't like the way the current CKRM code mixes kernel
and user content, beyond just the things that are user level accessible.

However, it was pointed out to me that some of the CKRM files define
things which are intended to be part of the interface between user and
kernel.  That is also where some header files are defined as LGPL.

Now, I believe the contents of those header files should be clearly
separated into user/kernel API/structure files and kernel-only headers.

How do you recommend that that usually be done without polluting the
applications C namespace?  This gets right back to the problem with
replicating everything for glibc under a new license, which is really
quite a crock but just the way things are today.  I'd rather start out
with something involving a bit less redundent code.

> > > Again with the unneeded typedef.  Come on Gerrit, you should know
> > > better...
> >  
> > Sorry, years of implementing Posix conformant OS's and system header
> > files make this very common for anyone (including several of the
> > CKRM developers).  Specifically because of user level name space
> > collision avoidance issues (e.g. think preserving backwards compatibility
> > for user level apps).  It is the primary mechanism for simplifying the
> > #ifdef __KERNEL__ crap used in most OS's.
> 
> If you are going to write Linux kernel code, use the proper style rules.
> No matter how many years working on other oses, it doesn't matter, you
> know better than to try to bring up that kind of objection...

The question above still stands.  Linus has mentioned the value of
__KERNEL__ in the past to help avoid the application name space
pollution issue as well, but _t also is an internationally accepted
convention among application programmers and system providers.  I'm
not as convinced that this is a case where Linux being different adds
any value to anyone, and actually makes it tougher to define header
files which can preserve an application/kernel API.

I'm trying to figure out the "right way" of solving the issue of
allowing user apps that happen to be mostly Posix conformant use
CKRM without polluting their namespace.  Seperate headers will do that,
at the minor annoyance of a proliferation of header files.

> > > > +#define ckrm_get_res_class(rescls, resid, type) \
> > > > +	((type*) (((resid != -1) && ((rescls) != NULL) \
> > > > +			   && ((rescls) != (void *)-1)) ? \
> > > > +	 ((struct ckrm_core_class *)(rescls))->res_class[resid] : NULL))
> > > 
> > > What exactly are you trying to do with this macro?  Cast to see if a
> > > pointer is not -1?  That doesn't sound very safe...
> > 
> > This needs to be fixed and better commented.  Basically, when a task
> > is exiting, it's class can be set to -1 (-1 in a pointer is, uh, icky).
> > But when uninitialized, it is set to NULL.  We need to come up with
> > a better fix for this one.
> 
> Setting a pointer to -1 is, uh, wrong.  Please fix this, as it's just
> broken.
 
Yes - I have the patch at hand to fix this, just need to merge it in.
It will be included in the next release.

> > > > +static inline void ckrm_core_grab(struct ckrm_core_class *core)
> > > > +{
> > > > +	if (core)
> > > > +		atomic_inc(&core->refcnt);
> > > > +}
> > > 
> > > Please just use kref, don't invent your own reference counting.
> >  
> > I agree with this but haven't gotten to it yet.  It will take
> > a bit more transformation since the current code is 0 based references
> > and kref_t's appear to be initialized to 1.  Also, the interactions with
> > freeing code will need just a little bit of thought.  So I'm deferring
> > this for the moment but not dropping it.
> 
> It doesn't matter if kref (there is no kref_t, I don't know where you
> got that from) is initialized to 42.  The whole point is the reference
> counting is handled properly for you, and you don't care, or know, what
> the actual count is, or how it works underneath.  Just that it works
> properly.

kref_init sets the count to 1 - we allocate the reference during init
code and increment later.  This is fixable, I just didn't get to it on
this round.

> Please fix the code to use kref, that's what it is there for (see my
> section in the OLS CodingStyle paper about using core kernel functions,
> and not reinventing your own...)
 
It shall be done.  I hate the department of redundency department as
much as anyone else.  ;)

> > > > +	ckrm_init();
> > > 
> > > Can't just make it an initcall?  What's wrong with the existing 7 levels
> > > that we have?
> >  
> > Okay, the initcalls hurt my head for a moment.  It looks like a quick
> > converstion to initcalls might change the order of events and I want
> > to hold off on that until I can make sure it doesn't break other things.
> > There are several order dependencies between the core, the filesystem
> > and the (potential) modules for controllers.
> 
> Then just pick the proper init call level.  That's what they are there
> for.  Don't use this kind of patch for your subsystem, unless there is
> no other way to get it initialized properly.  And if so, please let us
> know why not.

Yes, will do, just need to noodle on this one a bit.

> > > > +/*
> > > > + * Return TRUE if the given resource is registered.
> > > > + */
> > > 
> > > What is "TRUE"?  True in the kernel sense is usually 0 :)
> > 
> > So if (0) is true.  Wow.  Modified comment to say "non-zero".  I think
> > every definition of TRUE I could find was "1".  But TRUE is just an
> > arbitrary definition.  ;-)
> 
> No, return 0 if the given resource is registered.  That's the kernel
> calling convention, please follow it.
 
Hmm.  Haven't found a caller for this one.  It may fall out of this
patch and show up with the patch that actually uses it.
 
> > > > +/*
> > > > + * Registering a callback structure by the classification engine.
> > > > + *
> > > > + * Returns typeId of class on success -errno for failure.
> > > > + */
> > > > +int ckrm_register_engine(const char *typename, ckrm_eng_callback_t * ecbs)
> > > > +{
> > > > +	struct ckrm_classtype *ctype;
> > > > +
> > > > +	ctype = ckrm_find_classtype_by_name(typename);
> > > > +	if (ctype == NULL)
> > > > +		return (-ENOENT);
> > > > +
> > > > +	atomic_inc(&ctype->ce_regd);
> > > > +
> > > > +	/* another engine registered or trying to register ? */
> > > > +	if (atomic_read(&ctype->ce_regd) != 1) {
> > > > +		atomic_dec(&ctype->ce_regd);
> > > > +		return (-EBUSY);
> > > > +	}
> > > 
> > > Why not just use a lock if you are worried about this?
> >  
> > Wanted to avoid holding a lock while crossing the module boundary.
> > And, this is a very unlikely race.
> 
> Crossing what module boundry?  This is at startup time, and you don't
> need to worry about lock speeds, right?  Please don't try to reinvent a
> lock with atomic values for something like this.

The classification engines can be loadable modules.

gerrit

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] CKRM: 3/10 CKRM: Core ckrm, rcfs
  2005-02-24 20:54       ` Gerrit Huizenga
@ 2005-02-24 21:11         ` Greg KH
  2005-02-24 21:30           ` Gerrit Huizenga
  0 siblings, 1 reply; 13+ messages in thread
From: Greg KH @ 2005-02-24 21:11 UTC (permalink / raw)
  To: Gerrit Huizenga; +Cc: linux-kernel, akpm, Rik van Riel, Chris Mason, ckrm-tech

On Thu, Feb 24, 2005 at 12:54:17PM -0800, Gerrit Huizenga wrote:
> On Thu, 24 Feb 2005 09:52:23 PST, Greg KH wrote:
> > On Thu, Feb 24, 2005 at 01:33:12AM -0800, Gerrit Huizenga wrote:
> > > On Mon, 29 Nov 2004 14:00:47 PST, Greg KH wrote:
> > > > On Mon, Nov 29, 2004 at 10:47:32AM -0800, Gerrit Huizenga wrote:
> > > > > +typedef void *(*ce_classify_fct_t) (enum ckrm_event event, void *obj, ...);
> > > > > +typedef void (*ce_notify_fct_t) (enum ckrm_event event, void *classobj,
> > > > > +				 void *obj);
> > > > 
> > > > Ick.  Don't put a _t at the end of a typedef.  Wrong OS style guide.
> > >  
> > > Fixed.  Although this isn't an OS style guide thing - it is a Posix
> > > driven convention whereby any header file defined in the standard
> > > automatically has _t suffixed variables reserved to the implementation,
> > > e.g. no application is define variables using _t.  This header file isn't
> > > being used by user level applications so it doesn't matter.
> > 
> > But Linux kernel internals are not driven by Posix "conventions", hence,
> > my objection.
>  
> So what is the recommended way of making header files safe for both
> kernel and user level consumption when a header file contains
> structure definitions suitable for user/kernel communication?

Right now the way is, "Don't do it."  Write separate header files for
userspace.  See the lkml archives for details as to what the proposed
way to do this is, but I don't think anyone has started working on it
yet.

Either way, the kernel types better not be typedefs, end of story.

> > > > > +#define ckrm_get_res_class(rescls, resid, type) \
> > > > > +	((type*) (((resid != -1) && ((rescls) != NULL) \
> > > > > +			   && ((rescls) != (void *)-1)) ? \
> > > > > +	 ((struct ckrm_core_class *)(rescls))->res_class[resid] : NULL))
> > > > 
> > > > What exactly are you trying to do with this macro?  Cast to see if a
> > > > pointer is not -1?  That doesn't sound very safe...
> > > 
> > > This needs to be fixed and better commented.  Basically, when a task
> > > is exiting, it's class can be set to -1 (-1 in a pointer is, uh, icky).
> > > But when uninitialized, it is set to NULL.  We need to come up with
> > > a better fix for this one.
> > 
> > Setting a pointer to -1 is, uh, wrong.  Please fix this, as it's just
> > broken.
>  
> Yes - I have the patch at hand to fix this, just need to merge it in.
> It will be included in the next release.

Just curious, what is your level of involvement in this project?  Are
you just merging other developer's patches, or are you writing any of
the changes yourself?  Isn't a maintainer of a kernel subsystem supposed
to be one of the primary developers?

> > > > > +/*
> > > > > + * Registering a callback structure by the classification engine.
> > > > > + *
> > > > > + * Returns typeId of class on success -errno for failure.
> > > > > + */
> > > > > +int ckrm_register_engine(const char *typename, ckrm_eng_callback_t * ecbs)
> > > > > +{
> > > > > +	struct ckrm_classtype *ctype;
> > > > > +
> > > > > +	ctype = ckrm_find_classtype_by_name(typename);
> > > > > +	if (ctype == NULL)
> > > > > +		return (-ENOENT);
> > > > > +
> > > > > +	atomic_inc(&ctype->ce_regd);
> > > > > +
> > > > > +	/* another engine registered or trying to register ? */
> > > > > +	if (atomic_read(&ctype->ce_regd) != 1) {
> > > > > +		atomic_dec(&ctype->ce_regd);
> > > > > +		return (-EBUSY);
> > > > > +	}
> > > > 
> > > > Why not just use a lock if you are worried about this?
> > >  
> > > Wanted to avoid holding a lock while crossing the module boundary.
> > > And, this is a very unlikely race.
> > 
> > Crossing what module boundry?  This is at startup time, and you don't
> > need to worry about lock speeds, right?  Please don't try to reinvent a
> > lock with atomic values for something like this.
> 
> The classification engines can be loadable modules.

Then you have a race condition in the above code that needs to be fixed.
And no, using an atomic_t is not the solution.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] CKRM: 3/10 CKRM: Core ckrm, rcfs
  2005-02-24 21:11         ` Greg KH
@ 2005-02-24 21:30           ` Gerrit Huizenga
  2005-02-24 22:53             ` Chandra Seetharaman
  0 siblings, 1 reply; 13+ messages in thread
From: Gerrit Huizenga @ 2005-02-24 21:30 UTC (permalink / raw)
  To: Greg KH
  Cc: linux-kernel, akpm, Rik van Riel, Chris Mason, ckrm-tech, sekharan


On Thu, 24 Feb 2005 13:11:08 PST, Greg KH wrote:
> On Thu, Feb 24, 2005 at 12:54:17PM -0800, Gerrit Huizenga wrote:
> > On Thu, 24 Feb 2005 09:52:23 PST, Greg KH wrote:
> > > On Thu, Feb 24, 2005 at 01:33:12AM -0800, Gerrit Huizenga wrote:
> > > > On Mon, 29 Nov 2004 14:00:47 PST, Greg KH wrote:
> > > > > On Mon, Nov 29, 2004 at 10:47:32AM -0800, Gerrit Huizenga wrote:
> > > > > > +typedef void *(*ce_classify_fct_t) (enum ckrm_event event, void *obj, ...);
> > > > > > +typedef void (*ce_notify_fct_t) (enum ckrm_event event, void *classobj,
> > > > > > +				 void *obj);
> > > > > 
> > > > > Ick.  Don't put a _t at the end of a typedef.  Wrong OS style guide.
> > > >  
> > > > Fixed.  Although this isn't an OS style guide thing - it is a Posix
> > > > driven convention whereby any header file defined in the standard
> > > > automatically has _t suffixed variables reserved to the implementation,
> > > > e.g. no application is define variables using _t.  This header file isn't
> > > > being used by user level applications so it doesn't matter.
> > > 
> > > But Linux kernel internals are not driven by Posix "conventions", hence,
> > > my objection.
> >  
> > So what is the recommended way of making header files safe for both
> > kernel and user level consumption when a header file contains
> > structure definitions suitable for user/kernel communication?
> 
> Right now the way is, "Don't do it."  Write separate header files for
> userspace.  See the lkml archives for details as to what the proposed
> way to do this is, but I don't think anyone has started working on it
> yet.
 
Yeah - I've seen that.  Doesn't help for new projects yet since the
approach is not well fleshed out yet.

> > > > > > +#define ckrm_get_res_class(rescls, resid, type) \
> > > > > > +	((type*) (((resid != -1) && ((rescls) != NULL) \
> > > > > > +			   && ((rescls) != (void *)-1)) ? \
> > > > > > +	 ((struct ckrm_core_class *)(rescls))->res_class[resid] : NULL))
> > > > > 
> > > > > What exactly are you trying to do with this macro?  Cast to see if a
> > > > > pointer is not -1?  That doesn't sound very safe...
> > > > 
> > > > This needs to be fixed and better commented.  Basically, when a task
> > > > is exiting, it's class can be set to -1 (-1 in a pointer is, uh, icky).
> > > > But when uninitialized, it is set to NULL.  We need to come up with
> > > > a better fix for this one.
> > > 
> > > Setting a pointer to -1 is, uh, wrong.  Please fix this, as it's just
> > > broken.
> >  
> > Yes - I have the patch at hand to fix this, just need to merge it in.
> > It will be included in the next release.
> 
> Just curious, what is your level of involvement in this project?  Are
> you just merging other developer's patches, or are you writing any of
> the changes yourself?  Isn't a maintainer of a kernel subsystem supposed
> to be one of the primary developers?
 
I'm the person who will ensure that it is maintained.  There are quite
a few developers who have been involved over and those have changed a
bit over time and will continue to change.  However, I'll be sticking
around to make sure that the kernel side is cleaned up and remains
maintainable.  Some of the areas have more specific owners but some
also are supporting distros, research activities, etc.

Of the cleanups, I've done most of them myself but have had some help
and will continue to have help from several of the authors as we carry
forward.  I also did a fair share of cleanups prior to the first posting;
I'm not sure what you would have thought of the first few iterations of
the code.  ;-)

> > > > > > +/*
> > > > > > + * Registering a callback structure by the classification engine.
> > > > > > + *
> > > > > > + * Returns typeId of class on success -errno for failure.
> > > > > > + */
> > > > > > +int ckrm_register_engine(const char *typename, ckrm_eng_callback_t * ecbs)
> > > > > > +{
> > > > > > +	struct ckrm_classtype *ctype;
> > > > > > +
> > > > > > +	ctype = ckrm_find_classtype_by_name(typename);
> > > > > > +	if (ctype == NULL)
> > > > > > +		return (-ENOENT);
> > > > > > +
> > > > > > +	atomic_inc(&ctype->ce_regd);
> > > > > > +
> > > > > > +	/* another engine registered or trying to register ? */
> > > > > > +	if (atomic_read(&ctype->ce_regd) != 1) {
> > > > > > +		atomic_dec(&ctype->ce_regd);
> > > > > > +		return (-EBUSY);
> > > > > > +	}
> > > > > 
> > > > > Why not just use a lock if you are worried about this?
> > > >  
> > > > Wanted to avoid holding a lock while crossing the module boundary.
> > > > And, this is a very unlikely race.
> > > 
> > > Crossing what module boundry?  This is at startup time, and you don't
> > > need to worry about lock speeds, right?  Please don't try to reinvent a
> > > lock with atomic values for something like this.
> > 
> > The classification engines can be loadable modules.
> 
> Then you have a race condition in the above code that needs to be fixed.
> And no, using an atomic_t is not the solution.

Why not?  This simply gives an EBUSY if someone tries to load multiple
classification engines in parallel - one wins, one loses.  I'm not sure
if there is a higher level mutex on module loading that might even prevent
this race although I wouldn't be surprised if there were.  If there is,
I think this code might be removable.  If there isn't, it provides a
first-one-wins approach.

Hmm.  Oh, partial answer to my own question...  I think in theory you
could have a classification engine compiled into the kernel and another
one built as a module.  But no, you still wouldn't have a race like this.
All built-in CE's would be executed linearly, only module loads could
potentially race.

Chandra, do you know if this is the only race this is protecting against?
It is the only one I see at the moment.

gerrit

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] CKRM: 3/10 CKRM: Core ckrm, rcfs
  2005-02-24 21:30           ` Gerrit Huizenga
@ 2005-02-24 22:53             ` Chandra Seetharaman
  0 siblings, 0 replies; 13+ messages in thread
From: Chandra Seetharaman @ 2005-02-24 22:53 UTC (permalink / raw)
  To: Gerrit Huizenga
  Cc: Greg KH, linux-kernel, akpm, Rik van Riel, Chris Mason, ckrm-tech

On Thu, Feb 24, 2005 at 01:30:10PM -0800, Gerrit Huizenga wrote:
> > > 
> > > The classification engines can be loadable modules.
> > 
> > Then you have a race condition in the above code that needs to be fixed.
> > And no, using an atomic_t is not the solution.
> 
> Why not?  This simply gives an EBUSY if someone tries to load multiple
> classification engines in parallel - one wins, one loses.  I'm not sure
> if there is a higher level mutex on module loading that might even prevent
> this race although I wouldn't be surprised if there were.  If there is,
> I think this code might be removable.  If there isn't, it provides a
> first-one-wins approach.
> 
> Hmm.  Oh, partial answer to my own question...  I think in theory you
> could have a classification engine compiled into the kernel and another
> one built as a module.  But no, you still wouldn't have a race like this.
> All built-in CE's would be executed linearly, only module loads could
> potentially race.
> 
> Chandra, do you know if this is the only race this is protecting against?
> It is the only one I see at the moment.

We want to have only one CE active at any point of time, for that we need
to have some internal variable. using atomic helps us get that along with
protecting against a very unlikely race condition.

> 
> gerrit

-- 

----------------------------------------------------------------------
    Chandra Seetharaman               | Be careful what you choose....
              - sekharan@us.ibm.com   |      .......you may get it.
----------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] CKRM: 3/10 CKRM: Core ckrm, rcfs
       [not found] <200411292000.iATK0uuD003049@falcon10.austin.ibm.com>
@ 2004-11-29 20:14 ` Gerrit Huizenga
  0 siblings, 0 replies; 13+ messages in thread
From: Gerrit Huizenga @ 2004-11-29 20:14 UTC (permalink / raw)
  To: Doug Maxey; +Cc: linux-kernel, akpm, Rik van Riel, Chris Mason, ckrm-tech

On Mon, 29 Nov 2004 14:00:56 CST, Doug Maxey wrote:
> 
> On Mon, 29 Nov 2004 10:47:32 PST, Gerrit Huizenga wrote:
> >+extern struct rcfs_mfdesc *genmfdesc[]; 
> >+ 
> >+inline struct rcfs_inode_info *RCFS_I(struct inode *inode); <<<<<<<<
> >+ 
> >+int rcfs_empty(struct dentry *); 
> >+struct inode *rcfs_get_inode(struct super_block *, int, dev_t); 
> 
> You have this as both inline and exported.  The usage implies that it is 
> indeed exported, so inline should not be used in the decl.
> 
> ++doug

Good catch - will fix - thanks!

gerrit

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2005-02-24 23:09 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-11-29 18:47 [PATCH] CKRM: 3/10 CKRM: Core ckrm, rcfs Gerrit Huizenga
2004-11-29 22:00 ` Greg KH
2004-11-29 22:28   ` Roland Dreier
2004-11-29 22:33     ` Greg KH
2004-11-29 22:46       ` Randy.Dunlap
2004-11-30  7:46   ` Arjan van de Ven
2005-02-24  9:33   ` Gerrit Huizenga
2005-02-24 17:52     ` Greg KH
2005-02-24 20:54       ` Gerrit Huizenga
2005-02-24 21:11         ` Greg KH
2005-02-24 21:30           ` Gerrit Huizenga
2005-02-24 22:53             ` Chandra Seetharaman
     [not found] <200411292000.iATK0uuD003049@falcon10.austin.ibm.com>
2004-11-29 20:14 ` Gerrit Huizenga

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).