All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]
@ 2006-07-06  9:28 Shailabh Nagar
  2006-07-06  9:56 ` Andrew Morton
  0 siblings, 1 reply; 14+ messages in thread
From: Shailabh Nagar @ 2006-07-06  9:28 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jay Lan, Paul Jackson, Valdis.Kletnieks, Balbir Singh,
	Chris Sturtivant, linux-kernel, Jamal, netdev

On systems with a large number of cpus, with even a modest rate of
tasks exiting per cpu, the volume of taskstats data sent on thread exit
can overflow a userspace listener's buffers.

One approach to avoiding overflow is to allow listeners to get data for
a limited and specific set of cpus. By scaling the number of listeners
and/or the cpus they monitor, userspace can handle the statistical data
overload more gracefully.

In this patch, each listener registers to listen to a specific set of
cpus by specifying a cpumask.  The interest is recorded per-cpu. When
a task exits on a cpu, its taskstats data is unicast to each listener
interested in that cpu.

Thanks to Andrew Morton for pointing out the various scalability and
general concerns of previous attempts and for suggesting this design.

Signed-Off-By: Shailabh Nagar <nagar@watson.ibm.com>

---

Fixes comments by akpm:
- uninline taskstats_exit_alloc
- remove all preempt_disable
- combine per-cpu listener list and its locking into a single struct
- check cpumask as a whole against cpu_possible_map
- missing kfree(struct listener)

Suggested by Paul Jackson
- limit on length of cpumask passed in as a cpulist

Addresses the problem of "close fd without deregistration" though it
can be done better.


  include/linux/taskstats.h      |    4 -
  include/linux/taskstats_kern.h |   22 +----
  kernel/exit.c                  |    5 -
  kernel/taskstats.c             |  163 ++++++++++++++++++++++++++++++++++++++---
  4 files changed, 163 insertions(+), 31 deletions(-)

Index: linux-2.6.17-mm3equiv/include/linux/taskstats.h
===================================================================
--- linux-2.6.17-mm3equiv.orig/include/linux/taskstats.h	2006-06-30 19:03:40.000000000 -0400
+++ linux-2.6.17-mm3equiv/include/linux/taskstats.h	2006-07-06 02:38:28.000000000 -0400
@@ -87,8 +87,6 @@ struct taskstats {
  };


-#define TASKSTATS_LISTEN_GROUP	0x1
-
  /*
   * Commands sent from userspace
   * Not versioned. New commands should only be inserted at the enum's end
@@ -120,6 +118,8 @@ enum {
  	TASKSTATS_CMD_ATTR_UNSPEC = 0,
  	TASKSTATS_CMD_ATTR_PID,
  	TASKSTATS_CMD_ATTR_TGID,
+	TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
+	TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
  	__TASKSTATS_CMD_ATTR_MAX,
  };

Index: linux-2.6.17-mm3equiv/include/linux/taskstats_kern.h
===================================================================
--- linux-2.6.17-mm3equiv.orig/include/linux/taskstats_kern.h	2006-06-30 11:57:14.000000000 -0400
+++ linux-2.6.17-mm3equiv/include/linux/taskstats_kern.h	2006-07-05 16:51:52.000000000 -0400
@@ -20,21 +20,6 @@ enum {
  extern kmem_cache_t *taskstats_cache;
  extern struct mutex taskstats_exit_mutex;

-static inline int taskstats_has_listeners(void)
-{
-	if (!genl_sock)
-		return 0;
-	return netlink_has_listeners(genl_sock, TASKSTATS_LISTEN_GROUP);
-}
-
-
-static inline void taskstats_exit_alloc(struct taskstats **ptidstats)
-{
-	*ptidstats = NULL;
-	if (taskstats_has_listeners())
-		*ptidstats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
-}
-
  static inline void taskstats_exit_free(struct taskstats *tidstats)
  {
  	if (tidstats)
@@ -79,17 +64,18 @@ static inline void taskstats_tgid_free(s
  		kmem_cache_free(taskstats_cache, stats);
  }

-extern void taskstats_exit_send(struct task_struct *, struct taskstats *, int);
+extern void taskstats_exit_alloc(struct taskstats **, unsigned int *);
+extern void taskstats_exit_send(struct task_struct *, struct taskstats *, int, unsigned int);
  extern void taskstats_init_early(void);
  extern void taskstats_tgid_alloc(struct signal_struct *);
  #else
-static inline void taskstats_exit_alloc(struct taskstats **ptidstats)
+static inline void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
  {}
  static inline void taskstats_exit_free(struct taskstats *ptidstats)
  {}
  static inline void taskstats_exit_send(struct task_struct *tsk,
  				       struct taskstats *tidstats,
-				       int group_dead)
+				       int group_dead, unsigned int cpu)
  {}
  static inline void taskstats_tgid_init(struct signal_struct *sig)
  {}
Index: linux-2.6.17-mm3equiv/kernel/taskstats.c
===================================================================
--- linux-2.6.17-mm3equiv.orig/kernel/taskstats.c	2006-06-30 23:38:39.000000000 -0400
+++ linux-2.6.17-mm3equiv/kernel/taskstats.c	2006-07-06 02:41:15.000000000 -0400
@@ -19,9 +19,17 @@
  #include <linux/kernel.h>
  #include <linux/taskstats_kern.h>
  #include <linux/delayacct.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
  #include <net/genetlink.h>
  #include <asm/atomic.h>

+/*
+ * Maximum length of a cpumask that can be specified in
+ * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute
+ */
+#define TASKSTATS_CPUMASK_MAXLEN	(100+6*NR_CPUS)
+
  static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
  static int family_registered = 0;
  kmem_cache_t *taskstats_cache;
@@ -37,9 +45,26 @@ static struct nla_policy taskstats_cmd_g
  __read_mostly = {
  	[TASKSTATS_CMD_ATTR_PID]  = { .type = NLA_U32 },
  	[TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
+	[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
+	[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
+
+struct listener {
+	struct list_head list;
+	pid_t pid;
  };

+struct listener_list {
+	struct rw_semaphore sem;
+	struct list_head list;
+};
+static DEFINE_PER_CPU(struct listener_list, listener_array);

+enum actions {
+	REGISTER,
+	DEREGISTER,
+	CPU_DONT_CARE
+};
+
  static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
  			void **replyp, size_t size)
  {
@@ -74,9 +99,11 @@ static int prepare_reply(struct genl_inf
  	return 0;
  }

-static int send_reply(struct sk_buff *skb, pid_t pid, int event)
+static int send_reply(struct sk_buff *skb, pid_t pid, int event, unsigned int cpu)
  {
  	struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
+	struct listener_list *listeners;
+	struct list_head *p, *tmp;
  	void *reply;
  	int rc;

@@ -88,9 +115,29 @@ static int send_reply(struct sk_buff *sk
  		return rc;
  	}

-	if (event == TASKSTATS_MSG_MULTICAST)
-		return genlmsg_multicast(skb, pid, TASKSTATS_LISTEN_GROUP);
-	return genlmsg_unicast(skb, pid);
+	if (event == TASKSTATS_MSG_UNICAST)
+		return genlmsg_unicast(skb, pid);
+
+	/*
+	 * Taskstats multicast is unicasts to listeners who have registered
+	 * interest in cpu
+	 */
+
+	listeners = &per_cpu(listener_array, cpu);
+	down_write(&listeners->sem);
+	list_for_each_safe(p, tmp, &listeners->list) {
+		int ret;
+		struct listener *s = list_entry(p, struct listener, list);
+		ret = genlmsg_unicast(skb, s->pid);
+		if (ret) {
+			list_del(&s->list);
+			kfree(s);
+			rc = ret;
+		}
+	}
+	up_write(&listeners->sem);
+
+	return rc;
  }

  static int fill_pid(pid_t pid, struct task_struct *pidtsk,
@@ -201,8 +248,53 @@ ret:
  	return;
  }

+static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
+{
+	struct listener *s;
+	struct listener_list *listeners;
+	unsigned int cpu;
+	cpumask_t mask;
+	struct list_head *p;
+
+	memcpy(&mask, maskp, sizeof(cpumask_t));
+	if (!cpus_subset(mask, cpu_possible_map))
+		return -EINVAL;
+
+	if (isadd == REGISTER) {
+		for_each_cpu_mask(cpu, mask) {
+			s = kmalloc_node(sizeof(struct listener), GFP_KERNEL,
+					 cpu_to_node(cpu));
+			if (!s)
+				return -ENOMEM;
+			s->pid = pid;
+			INIT_LIST_HEAD(&s->list);
+
+			listeners = &per_cpu(listener_array, cpu);
+			down_write(&listeners->sem);
+			list_add(&s->list, &listeners->list);
+			up_write(&listeners->sem);
+		}
+	} else {
+		for_each_cpu_mask(cpu, mask) {
+			struct list_head *tmp;
+
+			listeners = &per_cpu(listener_array, cpu);
+			down_write(&listeners->sem);
+			list_for_each_safe(p, tmp, &listeners->list) {
+				s = list_entry(p, struct listener, list);
+				if (s->pid == pid) {
+					list_del(&s->list);
+					kfree(s);
+					break;
+				}
+			}
+			up_write(&listeners->sem);
+		}
+	}
+	return 0;
+}

-static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info)
+static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
  {
  	int rc = 0;
  	struct sk_buff *rep_skb;
@@ -210,6 +302,25 @@ static int taskstats_send_stats(struct s
  	void *reply;
  	size_t size;
  	struct nlattr *na;
+	cpumask_t mask;
+
+	if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) {
+		na = info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
+		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
+			return -E2BIG;
+		cpulist_parse((char *)nla_data(na), mask);
+		rc = add_del_listener(info->snd_pid, &mask, REGISTER);
+		return rc;
+	}
+
+	if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) {
+		na = info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK];
+		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
+			return -E2BIG;
+		cpulist_parse((char *)nla_data(na), mask);
+		rc = add_del_listener(info->snd_pid, &mask, DEREGISTER);
+		return rc;
+	}

  	/*
  	 * Size includes space for nested attributes
@@ -249,7 +360,8 @@ static int taskstats_send_stats(struct s

  	nla_nest_end(rep_skb, na);

-	return send_reply(rep_skb, info->snd_pid, TASKSTATS_MSG_UNICAST);
+	return send_reply(rep_skb, info->snd_pid, TASKSTATS_MSG_UNICAST,
+			  CPU_DONT_CARE);

  nla_put_failure:
  	return genlmsg_cancel(rep_skb, reply);
@@ -258,9 +370,36 @@ err:
  	return rc;
  }

+void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
+{
+	struct listener_list *listeners;
+	struct taskstats *tmp;
+	/*
+	 * This is the cpu on which the task is exiting currently and will
+	 * be the one for which the exit event is sent, even if the cpu
+	 * on which this function is running changes later.
+	 */
+	*mycpu = raw_smp_processor_id();
+
+	*ptidstats = NULL;
+	tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
+	if (!tmp)
+		return;
+
+	listeners = &per_cpu(listener_array, *mycpu);
+	down_read(&listeners->sem);
+	if (!list_empty(&listeners->list)) {
+		*ptidstats = tmp;
+		tmp = NULL;
+	}
+	up_read(&listeners->sem);
+	if (tmp)
+		kfree(tmp);
+}
+
  /* Send pid data out on exit */
  void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
-			int group_dead)
+			int group_dead, unsigned int mycpu)
  {
  	int rc;
  	struct sk_buff *rep_skb;
@@ -320,7 +459,7 @@ void taskstats_exit_send(struct task_str
  	nla_nest_end(rep_skb, na);

  send:
-	send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST);
+	send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST, mycpu);
  	return;

  nla_put_failure:
@@ -334,7 +473,7 @@ ret:

  static struct genl_ops taskstats_ops = {
  	.cmd		= TASKSTATS_CMD_GET,
-	.doit		= taskstats_send_stats,
+	.doit		= taskstats_user_cmd,
  	.policy		= taskstats_cmd_get_policy,
  };

@@ -349,6 +488,7 @@ void __init taskstats_init_early(void)
  static int __init taskstats_init(void)
  {
  	int rc;
+	unsigned int i;

  	rc = genl_register_family(&family);
  	if (rc)
@@ -358,6 +498,11 @@ static int __init taskstats_init(void)
  	rc = genl_register_ops(&family, &taskstats_ops);
  	if (rc < 0)
  		goto err;
+
+	for_each_possible_cpu(i) {
+		INIT_LIST_HEAD(&(per_cpu(listener_array, i).list));
+		init_rwsem(&(per_cpu(listener_array, i).sem));
+	}

  	return 0;
  err:
Index: linux-2.6.17-mm3equiv/kernel/exit.c
===================================================================
--- linux-2.6.17-mm3equiv.orig/kernel/exit.c	2006-06-28 16:09:01.000000000 -0400
+++ linux-2.6.17-mm3equiv/kernel/exit.c	2006-07-05 16:22:50.000000000 -0400
@@ -852,6 +852,7 @@ fastcall NORET_TYPE void do_exit(long co
  	struct task_struct *tsk = current;
  	struct taskstats *tidstats;
  	int group_dead;
+	unsigned int mycpu;

  	profile_task_exit(tsk);

@@ -889,7 +890,7 @@ fastcall NORET_TYPE void do_exit(long co
  				current->comm, current->pid,
  				preempt_count());

-	taskstats_exit_alloc(&tidstats);
+	taskstats_exit_alloc(&tidstats, &mycpu);

  	acct_update_integrals(tsk);
  	if (tsk->mm) {
@@ -910,7 +911,7 @@ fastcall NORET_TYPE void do_exit(long co
  #endif
  	if (unlikely(tsk->audit_context))
  		audit_free(tsk);
-	taskstats_exit_send(tsk, tidstats, group_dead);
+	taskstats_exit_send(tsk, tidstats, group_dead, mycpu);
  	taskstats_exit_free(tidstats);
  	delayacct_tsk_exit(tsk);


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]
  2006-07-06  9:28 [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks] Shailabh Nagar
@ 2006-07-06  9:56 ` Andrew Morton
  2006-07-06 10:44   ` Shailabh Nagar
                     ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Andrew Morton @ 2006-07-06  9:56 UTC (permalink / raw)
  To: Shailabh Nagar
  Cc: jlan, pj, Valdis.Kletnieks, balbir, csturtiv, linux-kernel, hadi, netdev

On Thu, 06 Jul 2006 05:28:35 -0400
Shailabh Nagar <nagar@watson.ibm.com> wrote:

> On systems with a large number of cpus, with even a modest rate of
> tasks exiting per cpu, the volume of taskstats data sent on thread exit
> can overflow a userspace listener's buffers.
> 
> One approach to avoiding overflow is to allow listeners to get data for
> a limited and specific set of cpus. By scaling the number of listeners
> and/or the cpus they monitor, userspace can handle the statistical data
> overload more gracefully.
> 
> In this patch, each listener registers to listen to a specific set of
> cpus by specifying a cpumask.  The interest is recorded per-cpu. When
> a task exits on a cpu, its taskstats data is unicast to each listener
> interested in that cpu.
> 
> Thanks to Andrew Morton for pointing out the various scalability and
> general concerns of previous attempts and for suggesting this design.
> 
> ...
>
> --- linux-2.6.17-mm3equiv.orig/include/linux/taskstats.h	2006-06-30 19:03:40.000000000 -0400
> +++ linux-2.6.17-mm3equiv/include/linux/taskstats.h	2006-07-06 02:38:28.000000000 -0400

Your email client performs space-stuffing.  Fortunately "sed -e 's/^  / /'"
is easy.

>   #include <linux/taskstats_kern.h>
>   #include <linux/delayacct.h>
> +#include <linux/cpumask.h>
> +#include <linux/percpu.h>
>   #include <net/genetlink.h>
>   #include <asm/atomic.h>

Like that.

> 
> +static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
> +{
> +	struct listener *s;
> +	struct listener_list *listeners;
> +	unsigned int cpu;
> +	cpumask_t mask;
> +	struct list_head *p;
> +
> +	memcpy(&mask, maskp, sizeof(cpumask_t));

	mask = *maskp; ?

> +	if (!cpus_subset(mask, cpu_possible_map))
> +		return -EINVAL;
> +
> +	if (isadd == REGISTER) {
> +		for_each_cpu_mask(cpu, mask) {
> +			s = kmalloc_node(sizeof(struct listener), GFP_KERNEL,
> +					 cpu_to_node(cpu));
> +			if (!s)
> +				return -ENOMEM;
> +			s->pid = pid;
> +			INIT_LIST_HEAD(&s->list);
> +
> +			listeners = &per_cpu(listener_array, cpu);
> +			down_write(&listeners->sem);
> +			list_add(&s->list, &listeners->list);
> +			up_write(&listeners->sem);
> +		}
> +	} else {
> +		for_each_cpu_mask(cpu, mask) {
> +			struct list_head *tmp;
> +
> +			listeners = &per_cpu(listener_array, cpu);
> +			down_write(&listeners->sem);
> +			list_for_each_safe(p, tmp, &listeners->list) {
> +				s = list_entry(p, struct listener, list);
> +				if (s->pid == pid) {
> +					list_del(&s->list);
> +					kfree(s);
> +					break;
> +				}
> +			}
> +			up_write(&listeners->sem);
> +		}
> +	}
> +	return 0;
> +}

You might choose to handle the ENOMEM situation here by backing out and not
leaving things half-installed.  I suspect that's just a simple `goto'.

> -static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info)
> +static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
>   {
>   	int rc = 0;
>   	struct sk_buff *rep_skb;
> @@ -210,6 +302,25 @@ static int taskstats_send_stats(struct s
>   	void *reply;
>   	size_t size;
>   	struct nlattr *na;
> +	cpumask_t mask;

When counting add_del_listener(), that's two cpumasks on the stack.  How
big can these get?  256 bytes?  Is it possible to get by with just the one?

> +
> +	if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) {
> +		na = info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
> +		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
> +			return -E2BIG;
> +		cpulist_parse((char *)nla_data(na), mask);

Best check the return value from this function.

> +		rc = add_del_listener(info->snd_pid, &mask, REGISTER);
> +		return rc;
> +	}
> +
> +	if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) {
> +		na = info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK];
> +		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
> +			return -E2BIG;
> +		cpulist_parse((char *)nla_data(na), mask);
> +		rc = add_del_listener(info->snd_pid, &mask, DEREGISTER);
> +		return rc;
> +	}
> 
> ...
> 
> +void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
> +{
> +	struct listener_list *listeners;
> +	struct taskstats *tmp;
> +	/*
> +	 * This is the cpu on which the task is exiting currently and will
> +	 * be the one for which the exit event is sent, even if the cpu
> +	 * on which this function is running changes later.
> +	 */
> +	*mycpu = raw_smp_processor_id();
> +
> +	*ptidstats = NULL;
> +	tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
> +	if (!tmp)
> +		return;
> +
> +	listeners = &per_cpu(listener_array, *mycpu);
> +	down_read(&listeners->sem);
> +	if (!list_empty(&listeners->list)) {
> +		*ptidstats = tmp;
> +		tmp = NULL;
> +	}
> +	up_read(&listeners->sem);
> +	if (tmp)
> +		kfree(tmp);

kfree(NULL) is legal.


Looks good to me.  Does it work?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]
  2006-07-06  9:56 ` Andrew Morton
@ 2006-07-06 10:44   ` Shailabh Nagar
  2006-07-06 11:37   ` Shailabh Nagar
  2006-07-07  6:18   ` Paul Jackson
  2 siblings, 0 replies; 14+ messages in thread
From: Shailabh Nagar @ 2006-07-06 10:44 UTC (permalink / raw)
  To: Andrew Morton
  Cc: jlan, pj, Valdis.Kletnieks, balbir, csturtiv, linux-kernel, hadi, netdev

On Thu, 2006-07-06 at 02:56 -0700, Andrew Morton wrote:
> On Thu, 06 Jul 2006 05:28:35 -0400
> Shailabh Nagar <nagar@watson.ibm.com> wrote:
> 
> > On systems with a large number of cpus, with even a modest rate of
> > tasks exiting per cpu, the volume of taskstats data sent on thread exit
> > can overflow a userspace listener's buffers.
> > 
> > One approach to avoiding overflow is to allow listeners to get data for
> > a limited and specific set of cpus. By scaling the number of listeners
> > and/or the cpus they monitor, userspace can handle the statistical data
> > overload more gracefully.
> > 
> > In this patch, each listener registers to listen to a specific set of
> > cpus by specifying a cpumask.  The interest is recorded per-cpu. When
> > a task exits on a cpu, its taskstats data is unicast to each listener
> > interested in that cpu.
> > 
> > Thanks to Andrew Morton for pointing out the various scalability and
> > general concerns of previous attempts and for suggesting this design.
> > 
> > ...
> >
> > --- linux-2.6.17-mm3equiv.orig/include/linux/taskstats.h	2006-06-30 19:03:40.000000000 -0400
> > +++ linux-2.6.17-mm3equiv/include/linux/taskstats.h	2006-07-06 02:38:28.000000000 -0400
> 
> Your email client performs space-stuffing.  Fortunately "sed -e 's/^  / /'"
> is easy.
> 
> >   #include <linux/taskstats_kern.h>
> >   #include <linux/delayacct.h>
> > +#include <linux/cpumask.h>
> > +#include <linux/percpu.h>
> >   #include <net/genetlink.h>
> >   #include <asm/atomic.h>
> 
> Like that.

Sorry. First the text breaking and now patch mangling....I'll switch
mail clients.
> 
> > 
> > +static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
> > +{
> > +	struct listener *s;
> > +	struct listener_list *listeners;
> > +	unsigned int cpu;
> > +	cpumask_t mask;
> > +	struct list_head *p;
> > +
> > +	memcpy(&mask, maskp, sizeof(cpumask_t));
> 
> 	mask = *maskp; ?

Yes.

> 
> > +	if (!cpus_subset(mask, cpu_possible_map))
> > +		return -EINVAL;
> > +
> > +	if (isadd == REGISTER) {
> > +		for_each_cpu_mask(cpu, mask) {
> > +			s = kmalloc_node(sizeof(struct listener), GFP_KERNEL,
> > +					 cpu_to_node(cpu));
> > +			if (!s)
> > +				return -ENOMEM;
> > +			s->pid = pid;
> > +			INIT_LIST_HEAD(&s->list);
> > +
> > +			listeners = &per_cpu(listener_array, cpu);
> > +			down_write(&listeners->sem);
> > +			list_add(&s->list, &listeners->list);
> > +			up_write(&listeners->sem);
> > +		}
> > +	} else {
> > +		for_each_cpu_mask(cpu, mask) {
> > +			struct list_head *tmp;
> > +
> > +			listeners = &per_cpu(listener_array, cpu);
> > +			down_write(&listeners->sem);
> > +			list_for_each_safe(p, tmp, &listeners->list) {
> > +				s = list_entry(p, struct listener, list);
> > +				if (s->pid == pid) {
> > +					list_del(&s->list);
> > +					kfree(s);
> > +					break;
> > +				}
> > +			}
> > +			up_write(&listeners->sem);
> > +		}
> > +	}
> > +	return 0;
> > +}
> 
> You might choose to handle the ENOMEM situation here by backing out and not
> leaving things half-installed.  I suspect that's just a simple `goto'.

Will do. 

> 
> > -static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info)
> > +static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
> >   {
> >   	int rc = 0;
> >   	struct sk_buff *rep_skb;
> > @@ -210,6 +302,25 @@ static int taskstats_send_stats(struct s
> >   	void *reply;
> >   	size_t size;
> >   	struct nlattr *na;
> > +	cpumask_t mask;
> 
> When counting add_del_listener(), that's two cpumasks on the stack.  How
> big can these get?  256 bytes?  Is it possible to get by with just the one?

For 1024 cpus, 128 bytes. All the cpumask functions seem to need the
mask (though they internally use the address). Will try to see if I can
reduce.
> 
> > +
> > +	if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) {
> > +		na = info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
> > +		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
> > +			return -E2BIG;
> > +		cpulist_parse((char *)nla_data(na), mask);
> 
> Best check the return value from this function.

Oops.

> 
> > +		rc = add_del_listener(info->snd_pid, &mask, REGISTER);
> > +		return rc;
> > +	}
> > +
> > +	if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) {
> > +		na = info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK];
> > +		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
> > +			return -E2BIG;
> > +		cpulist_parse((char *)nla_data(na), mask);
> > +		rc = add_del_listener(info->snd_pid, &mask, DEREGISTER);
> > +		return rc;
> > +	}
> > 
> > ...
> > 
> > +void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
> > +{
> > +	struct listener_list *listeners;
> > +	struct taskstats *tmp;
> > +	/*
> > +	 * This is the cpu on which the task is exiting currently and will
> > +	 * be the one for which the exit event is sent, even if the cpu
> > +	 * on which this function is running changes later.
> > +	 */
> > +	*mycpu = raw_smp_processor_id();
> > +
> > +	*ptidstats = NULL;
> > +	tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
> > +	if (!tmp)
> > +		return;
> > +
> > +	listeners = &per_cpu(listener_array, *mycpu);
> > +	down_read(&listeners->sem);
> > +	if (!list_empty(&listeners->list)) {
> > +		*ptidstats = tmp;
> > +		tmp = NULL;
> > +	}
> > +	up_read(&listeners->sem);
> > +	if (tmp)
> > +		kfree(tmp);
> 
> kfree(NULL) is legal.

Aha. Didn't remember that.
> 
> 
> Looks good to me.  Does it work?

Yes, though ts only been run on a single cpu so far but the 
blocking of sends without a registered listener,
registration/deregistration etc. is all working (confirmed by printing
out the registered list of pids etc.). Stability is also fine so far.

--Shailabh


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]
  2006-07-06  9:56 ` Andrew Morton
  2006-07-06 10:44   ` Shailabh Nagar
@ 2006-07-06 11:37   ` Shailabh Nagar
  2006-07-06 12:08     ` Thomas Graf
  2006-07-07  6:18   ` Paul Jackson
  2 siblings, 1 reply; 14+ messages in thread
From: Shailabh Nagar @ 2006-07-06 11:37 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jay Lan, pj, Valdis.Kletnieks, Balbir Singh, csturtiv,
	linux-kernel, Jamal, netdev

On systems with a large number of cpus, with even a modest rate of 
tasks exiting per cpu, the volume of taskstats data sent on thread exit
can overflow a userspace listener's buffers.

One approach to avoiding overflow is to allow listeners to get data for
a limited and specific set of cpus. By scaling the number of listeners 
and/or the cpus they monitor, userspace can handle the statistical data
overload more gracefully.

In this patch, each listener registers to listen to a specific set of 
cpus by specifying a cpumask.  The interest is recorded per-cpu. When 
a task exits on a cpu, its taskstats data is unicast to each listener 
interested in that cpu.

Thanks to Andrew Morton for pointing out the various scalability and 
general concerns of previous attempts and for suggesting this design.

Signed-Off-By: Shailabh Nagar <nagar@watson.ibm.com>
---

Addresses various comments by akpm:
- fix your email client !
- avoid memcpy of cpumask
- handle ENOMEM within add_del_listener properly
- check return value of cpulist_parse
- exploit kfree(NULL)

Does not address (can't see an easy way to do)
- try to avoid allocating two cpumask's on stack from taskstats_user_cmd 

Tested on a 1-way and works ok. Testing on 2/4 way being done.


 include/linux/taskstats.h      |    4 
 include/linux/taskstats_kern.h |   22 -----
 kernel/exit.c                  |    5 -
 kernel/taskstats.c             |  168 ++++++++++++++++++++++++++++++++++++++---
 4 files changed, 168 insertions(+), 31 deletions(-)

Index: linux-2.6.17-mm3equiv/include/linux/taskstats.h
===================================================================
--- linux-2.6.17-mm3equiv.orig/include/linux/taskstats.h	2006-06-30 19:03:40.000000000 -0400
+++ linux-2.6.17-mm3equiv/include/linux/taskstats.h	2006-07-06 06:48:10.000000000 -0400
@@ -87,8 +87,6 @@ struct taskstats {
 };
 
 
-#define TASKSTATS_LISTEN_GROUP	0x1
-
 /*
  * Commands sent from userspace
  * Not versioned. New commands should only be inserted at the enum's end
@@ -120,6 +118,8 @@ enum {
 	TASKSTATS_CMD_ATTR_UNSPEC = 0,
 	TASKSTATS_CMD_ATTR_PID,
 	TASKSTATS_CMD_ATTR_TGID,
+	TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
+	TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
 	__TASKSTATS_CMD_ATTR_MAX,
 };
 
Index: linux-2.6.17-mm3equiv/include/linux/taskstats_kern.h
===================================================================
--- linux-2.6.17-mm3equiv.orig/include/linux/taskstats_kern.h	2006-06-30 11:57:14.000000000 -0400
+++ linux-2.6.17-mm3equiv/include/linux/taskstats_kern.h	2006-07-06 06:48:10.000000000 -0400
@@ -20,21 +20,6 @@ enum {
 extern kmem_cache_t *taskstats_cache;
 extern struct mutex taskstats_exit_mutex;
 
-static inline int taskstats_has_listeners(void)
-{
-	if (!genl_sock)
-		return 0;
-	return netlink_has_listeners(genl_sock, TASKSTATS_LISTEN_GROUP);
-}
-
-
-static inline void taskstats_exit_alloc(struct taskstats **ptidstats)
-{
-	*ptidstats = NULL;
-	if (taskstats_has_listeners())
-		*ptidstats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
-}
-
 static inline void taskstats_exit_free(struct taskstats *tidstats)
 {
 	if (tidstats)
@@ -79,17 +64,18 @@ static inline void taskstats_tgid_free(s
 		kmem_cache_free(taskstats_cache, stats);
 }
 
-extern void taskstats_exit_send(struct task_struct *, struct taskstats *, int);
+extern void taskstats_exit_alloc(struct taskstats **, unsigned int *);
+extern void taskstats_exit_send(struct task_struct *, struct taskstats *, int, unsigned int);
 extern void taskstats_init_early(void);
 extern void taskstats_tgid_alloc(struct signal_struct *);
 #else
-static inline void taskstats_exit_alloc(struct taskstats **ptidstats)
+static inline void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
 {}
 static inline void taskstats_exit_free(struct taskstats *ptidstats)
 {}
 static inline void taskstats_exit_send(struct task_struct *tsk,
 				       struct taskstats *tidstats,
-				       int group_dead)
+				       int group_dead, unsigned int cpu)
 {}
 static inline void taskstats_tgid_init(struct signal_struct *sig)
 {}
Index: linux-2.6.17-mm3equiv/kernel/taskstats.c
===================================================================
--- linux-2.6.17-mm3equiv.orig/kernel/taskstats.c	2006-06-30 23:38:39.000000000 -0400
+++ linux-2.6.17-mm3equiv/kernel/taskstats.c	2006-07-06 07:02:54.000000000 -0400
@@ -19,9 +19,17 @@
 #include <linux/kernel.h>
 #include <linux/taskstats_kern.h>
 #include <linux/delayacct.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
 #include <net/genetlink.h>
 #include <asm/atomic.h>
 
+/*
+ * Maximum length of a cpumask that can be specified in
+ * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute
+ */
+#define TASKSTATS_CPUMASK_MAXLEN	(100+6*NR_CPUS)
+
 static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
 static int family_registered = 0;
 kmem_cache_t *taskstats_cache;
@@ -37,9 +45,26 @@ static struct nla_policy taskstats_cmd_g
 __read_mostly = {
 	[TASKSTATS_CMD_ATTR_PID]  = { .type = NLA_U32 },
 	[TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
+	[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
+	[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
+
+struct listener {
+	struct list_head list;
+	pid_t pid;
 };
 
+struct listener_list {
+	struct rw_semaphore sem;
+	struct list_head list;
+};
+static DEFINE_PER_CPU(struct listener_list, listener_array);
 
+enum actions {
+	REGISTER,
+	DEREGISTER,
+	CPU_DONT_CARE
+};
+
 static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
 			void **replyp, size_t size)
 {
@@ -74,9 +99,11 @@ static int prepare_reply(struct genl_inf
 	return 0;
 }
 
-static int send_reply(struct sk_buff *skb, pid_t pid, int event)
+static int send_reply(struct sk_buff *skb, pid_t pid, int event, unsigned int cpu)
 {
 	struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
+	struct listener_list *listeners;
+	struct list_head *p, *tmp;
 	void *reply;
 	int rc;
 
@@ -88,9 +115,29 @@ static int send_reply(struct sk_buff *sk
 		return rc;
 	}
 
-	if (event == TASKSTATS_MSG_MULTICAST)
-		return genlmsg_multicast(skb, pid, TASKSTATS_LISTEN_GROUP);
-	return genlmsg_unicast(skb, pid);
+	if (event == TASKSTATS_MSG_UNICAST)
+		return genlmsg_unicast(skb, pid);
+
+	/*
+	 * Taskstats multicast is unicasts to listeners who have registered
+	 * interest in cpu
+	 */
+
+	listeners = &per_cpu(listener_array, cpu);
+	down_write(&listeners->sem);
+	list_for_each_safe(p, tmp, &listeners->list) {
+		int ret;
+		struct listener *s = list_entry(p, struct listener, list);
+		ret = genlmsg_unicast(skb, s->pid);
+		if (ret) {
+			list_del(&s->list);
+			kfree(s);
+			rc = ret;
+		}
+	}
+	up_write(&listeners->sem);
+
+	return rc;
 }
 
 static int fill_pid(pid_t pid, struct task_struct *pidtsk,
@@ -201,8 +248,55 @@ ret:
 	return;
 }
 
+static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
+{
+	struct listener *s;
+	struct listener_list *listeners;
+	unsigned int cpu;
+	cpumask_t mask = *maskp;
+	struct list_head *p;
+
+	if (!cpus_subset(mask, cpu_possible_map))
+		return -EINVAL;
+
+	if (isadd == REGISTER) {
+		for_each_cpu_mask(cpu, mask) {
+			s = kmalloc_node(sizeof(struct listener), GFP_KERNEL,
+					 cpu_to_node(cpu));
+			if (!s)
+				goto cleanup;
+			s->pid = pid;
+			INIT_LIST_HEAD(&s->list);
+
+			listeners = &per_cpu(listener_array, cpu);
+			down_write(&listeners->sem);
+			list_add(&s->list, &listeners->list);
+			up_write(&listeners->sem);
+		}
+		return 0;
+	}
+
+	/* Deregister or cleanup */
+cleanup:
+	for_each_cpu_mask(cpu, mask) {
+		struct list_head *tmp;
+
+		listeners = &per_cpu(listener_array, cpu);
+		down_write(&listeners->sem);
+		list_for_each_safe(p, tmp, &listeners->list) {
+			s = list_entry(p, struct listener, list);
+			if (s->pid == pid) {
+				list_del(&s->list);
+				kfree(s);
+				break;
+			}
+		}
+		up_write(&listeners->sem);
+	}
+	return 0;
+}
 
-static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info)
+static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 {
 	int rc = 0;
 	struct sk_buff *rep_skb;
@@ -210,6 +304,29 @@ static int taskstats_send_stats(struct s
 	void *reply;
 	size_t size;
 	struct nlattr *na;
+	cpumask_t mask;
+
+	if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) {
+		na = info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
+		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
+			return -E2BIG;
+		rc = cpulist_parse((char *)nla_data(na), mask);
+		if (rc)
+			return rc;
+		rc = add_del_listener(info->snd_pid, &mask, REGISTER);
+		return rc;
+	}
+
+	if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) {
+		na = info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK];
+		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
+			return -E2BIG;
+		rc = cpulist_parse((char *)nla_data(na), mask);
+		if (rc)
+			return rc;
+		rc = add_del_listener(info->snd_pid, &mask, DEREGISTER);
+		return rc;
+	}
 
 	/*
 	 * Size includes space for nested attributes
@@ -249,7 +366,8 @@ static int taskstats_send_stats(struct s
 
 	nla_nest_end(rep_skb, na);
 
-	return send_reply(rep_skb, info->snd_pid, TASKSTATS_MSG_UNICAST);
+	return send_reply(rep_skb, info->snd_pid, TASKSTATS_MSG_UNICAST,
+			  CPU_DONT_CARE);
 
 nla_put_failure:
 	return genlmsg_cancel(rep_skb, reply);
@@ -258,9 +376,35 @@ err:
 	return rc;
 }
 
+void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
+{
+	struct listener_list *listeners;
+	struct taskstats *tmp;
+	/*
+	 * This is the cpu on which the task is exiting currently and will
+	 * be the one for which the exit event is sent, even if the cpu
+	 * on which this function is running changes later.
+	 */
+	*mycpu = raw_smp_processor_id();
+
+	*ptidstats = NULL;
+	tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
+	if (!tmp)
+		return;
+
+	listeners = &per_cpu(listener_array, *mycpu);
+	down_read(&listeners->sem);
+	if (!list_empty(&listeners->list)) {
+		*ptidstats = tmp;
+		tmp = NULL;
+	}
+	up_read(&listeners->sem);
+	kfree(tmp);
+}
+
 /* Send pid data out on exit */
 void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
-			int group_dead)
+			int group_dead, unsigned int mycpu)
 {
 	int rc;
 	struct sk_buff *rep_skb;
@@ -320,7 +464,7 @@ void taskstats_exit_send(struct task_str
 	nla_nest_end(rep_skb, na);
 
 send:
-	send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST);
+	send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST, mycpu);
 	return;
 
 nla_put_failure:
@@ -334,7 +478,7 @@ ret:
 
 static struct genl_ops taskstats_ops = {
 	.cmd		= TASKSTATS_CMD_GET,
-	.doit		= taskstats_send_stats,
+	.doit		= taskstats_user_cmd,
 	.policy		= taskstats_cmd_get_policy,
 };
 
@@ -349,6 +493,7 @@ void __init taskstats_init_early(void)
 static int __init taskstats_init(void)
 {
 	int rc;
+	unsigned int i;
 
 	rc = genl_register_family(&family);
 	if (rc)
@@ -358,6 +503,11 @@ static int __init taskstats_init(void)
 	rc = genl_register_ops(&family, &taskstats_ops);
 	if (rc < 0)
 		goto err;
+
+	for_each_possible_cpu(i) {
+		INIT_LIST_HEAD(&(per_cpu(listener_array, i).list));
+		init_rwsem(&(per_cpu(listener_array, i).sem));
+	}
 
 	return 0;
 err:
Index: linux-2.6.17-mm3equiv/kernel/exit.c
===================================================================
--- linux-2.6.17-mm3equiv.orig/kernel/exit.c	2006-06-28 16:09:01.000000000 -0400
+++ linux-2.6.17-mm3equiv/kernel/exit.c	2006-07-05 16:22:50.000000000 -0400
@@ -852,6 +852,7 @@ fastcall NORET_TYPE void do_exit(long co
 	struct task_struct *tsk = current;
 	struct taskstats *tidstats;
 	int group_dead;
+	unsigned int mycpu;
 
 	profile_task_exit(tsk);
 
@@ -889,7 +890,7 @@ fastcall NORET_TYPE void do_exit(long co
 				current->comm, current->pid,
 				preempt_count());
 
-	taskstats_exit_alloc(&tidstats);
+	taskstats_exit_alloc(&tidstats, &mycpu);
 
 	acct_update_integrals(tsk);
 	if (tsk->mm) {
@@ -910,7 +911,7 @@ fastcall NORET_TYPE void do_exit(long co
 #endif
 	if (unlikely(tsk->audit_context))
 		audit_free(tsk);
-	taskstats_exit_send(tsk, tidstats, group_dead);
+	taskstats_exit_send(tsk, tidstats, group_dead, mycpu);
 	taskstats_exit_free(tidstats);
 	delayacct_tsk_exit(tsk);
 



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]
  2006-07-06 11:37   ` Shailabh Nagar
@ 2006-07-06 12:08     ` Thomas Graf
  2006-07-06 13:21       ` Shailabh Nagar
  2006-07-06 21:48       ` Andrew Morton
  0 siblings, 2 replies; 14+ messages in thread
From: Thomas Graf @ 2006-07-06 12:08 UTC (permalink / raw)
  To: Shailabh Nagar
  Cc: Andrew Morton, Jay Lan, pj, Valdis.Kletnieks, Balbir Singh,
	csturtiv, linux-kernel, Jamal, netdev

* Shailabh Nagar <nagar@watson.ibm.com> 2006-07-06 07:37
> @@ -37,9 +45,26 @@ static struct nla_policy taskstats_cmd_g
>  __read_mostly = {
>  	[TASKSTATS_CMD_ATTR_PID]  = { .type = NLA_U32 },
>  	[TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
> +	[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
> +	[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};

> +		na = info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
> +		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
> +			return -E2BIG;
> +		rc = cpulist_parse((char *)nla_data(na), mask);

This isn't safe, the data in the attribute is not guaranteed to be
NUL terminated. Still it's probably me to blame for not making
this more obvious in the API.

I've attached a patch below extending the API to make it easier
for interfaces using NUL termianted strings, so you'd do:

[TASKSTATS_CMS_ATTR_REGISTER_CPUMASK] = { .type = NLA_NUL_STRING,
                                          .len = TASKSTATS_CPUMASK_MAXLEN }

... and then use (char *) nla_data(str_attr) without any further
sanity checks.

[NETLINK]: Improve string attribute validation

Introduces a new attribute type NLA_NUL_STRING to support NUL
terminated strings. Attributes of this kind require to carry
a terminating NUL within the maximum specified in the policy.

The `old' NLA_STRING which is not required to be NUL terminated
is extended to provide means to specify a maximum length of the
string.

Aims at easing the pain with using nla_strlcpy() on temporary
buffers.

The old `minlen' field is renamed to `len' for cosmetic purposes
which is ok since nobody was using it at this point.

Signed-off-by: Thomas Graf <tgraf@suug.ch>

Index: net-2.6.git/include/net/netlink.h
===================================================================
--- net-2.6.git.orig/include/net/netlink.h
+++ net-2.6.git/include/net/netlink.h
@@ -158,6 +158,7 @@ enum {
 	NLA_FLAG,
 	NLA_MSECS,
 	NLA_NESTED,
+	NLA_NUL_STRING,
 	__NLA_TYPE_MAX,
 };
 
@@ -166,21 +167,27 @@ enum {
 /**
  * struct nla_policy - attribute validation policy
  * @type: Type of attribute or NLA_UNSPEC
- * @minlen: Minimal length of payload required to be available
+ * @len: Type specific length of payload
  *
  * Policies are defined as arrays of this struct, the array must be
  * accessible by attribute type up to the highest identifier to be expected.
  *
+ * Meaning of `len' field:
+ *    NLA_STRING           Maximum length of string
+ *    NLA_NUL_STRING       Maximum length of string including NUL
+ *    NLA_FLAG             Unused
+ *    All other            Exact length of attribute payload
+ *
  * Example:
  * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = {
  * 	[ATTR_FOO] = { .type = NLA_U16 },
- *	[ATTR_BAR] = { .type = NLA_STRING },
- *	[ATTR_BAZ] = { .minlen = sizeof(struct mystruct) },
+ *	[ATTR_BAR] = { .type = NLA_STRING, len = BARSIZ },
+ *	[ATTR_BAZ] = { .len = sizeof(struct mystruct) },
  * };
  */
 struct nla_policy {
 	u16		type;
-	u16		minlen;
+	u16		len;
 };
 
 extern void		netlink_run_queue(struct sock *sk, unsigned int *qlen,
Index: net-2.6.git/net/netlink/attr.c
===================================================================
--- net-2.6.git.orig/net/netlink/attr.c
+++ net-2.6.git/net/netlink/attr.c
@@ -20,7 +20,6 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+
 	[NLA_U16]	= sizeof(u16),
 	[NLA_U32]	= sizeof(u32),
 	[NLA_U64]	= sizeof(u64),
-	[NLA_STRING]	= 1,
 	[NLA_NESTED]	= NLA_HDRLEN,
 };
 
@@ -28,7 +27,7 @@ static int validate_nla(struct nlattr *n
 			struct nla_policy *policy)
 {
 	struct nla_policy *pt;
-	int minlen = 0;
+	int minlen = 0, attrlen = nla_len(nla);
 
 	if (nla->nla_type <= 0 || nla->nla_type > maxtype)
 		return 0;
@@ -37,16 +36,33 @@ static int validate_nla(struct nlattr *n
 
 	BUG_ON(pt->type > NLA_TYPE_MAX);
 
-	if (pt->minlen)
-		minlen = pt->minlen;
-	else if (pt->type != NLA_UNSPEC)
-		minlen = nla_attr_minlen[pt->type];
+	switch (pt->type) {
+	case NLA_FLAG:
+		if (attrlen > 0)
+			return -ERANGE;
+		break;
+
+	case NLA_NUL_STRING:
+		minlen = min_t(int, attrlen, pt->len);
+
+		if (!minlen || strnchr(nla_data(nla), minlen, '\0') == NULL)
+			return -EINVAL;
+		/* fall through */
+
+	case NLA_STRING:
+		if (attrlen < 1 || attrlen > pt->len)
+			return -ERANGE;
+		break;
+
+	default:
+		if (pt->len)
+			minlen = pt->len;
+		else if (pt->type != NLA_UNSPEC)
+			minlen = nla_attr_minlen[pt->type];
 
-	if (pt->type == NLA_FLAG && nla_len(nla) > 0)
-		return -ERANGE;
-
-	if (nla_len(nla) < minlen)
-		return -ERANGE;
+		if (attrlen < minlen)
+			return -ERANGE;
+	}
 
 	return 0;
 }

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]
  2006-07-06 12:08     ` Thomas Graf
@ 2006-07-06 13:21       ` Shailabh Nagar
  2006-07-06 21:48       ` Andrew Morton
  1 sibling, 0 replies; 14+ messages in thread
From: Shailabh Nagar @ 2006-07-06 13:21 UTC (permalink / raw)
  To: Thomas Graf
  Cc: Andrew Morton, Jay Lan, pj, Valdis.Kletnieks, Balbir Singh,
	csturtiv, linux-kernel, Jamal, netdev

On Thu, 2006-07-06 at 14:08 +0200, Thomas Graf wrote:
> * Shailabh Nagar <nagar@watson.ibm.com> 2006-07-06 07:37
> > @@ -37,9 +45,26 @@ static struct nla_policy taskstats_cmd_g
> >  __read_mostly = {
> >  	[TASKSTATS_CMD_ATTR_PID]  = { .type = NLA_U32 },
> >  	[TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
> > +	[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
> > +	[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
> 
> > +		na = info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
> > +		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
> > +			return -E2BIG;
> > +		rc = cpulist_parse((char *)nla_data(na), mask);
> 
> This isn't safe, the data in the attribute is not guaranteed to be
> NUL terminated. Still it's probably me to blame for not making
> this more obvious in the API.
> 
> I've attached a patch below extending the API to make it easier
> for interfaces using NUL termianted strings, so you'd do:
> 
> [TASKSTATS_CMS_ATTR_REGISTER_CPUMASK] = { .type = NLA_NUL_STRING,
>                                           .len = TASKSTATS_CPUMASK_MAXLEN }
> 
> ... and then use (char *) nla_data(str_attr) without any further
> sanity checks.


Thanks. That makes it much clearer. I was looking around for a "maxlen"
attribute helper yesterday :-)

--Shailabh


> 
> [NETLINK]: Improve string attribute validation
> 
> Introduces a new attribute type NLA_NUL_STRING to support NUL
> terminated strings. Attributes of this kind require to carry
> a terminating NUL within the maximum specified in the policy.
> 
> The `old' NLA_STRING which is not required to be NUL terminated
> is extended to provide means to specify a maximum length of the
> string.
> 
> Aims at easing the pain with using nla_strlcpy() on temporary
> buffers.
> 
> The old `minlen' field is renamed to `len' for cosmetic purposes
> which is ok since nobody was using it at this point.
> 
> Signed-off-by: Thomas Graf <tgraf@suug.ch>

<snip>


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]
  2006-07-06 12:08     ` Thomas Graf
  2006-07-06 13:21       ` Shailabh Nagar
@ 2006-07-06 21:48       ` Andrew Morton
  2006-07-06 22:27         ` Shailabh Nagar
  2006-07-06 22:40         ` Thomas Graf
  1 sibling, 2 replies; 14+ messages in thread
From: Andrew Morton @ 2006-07-06 21:48 UTC (permalink / raw)
  To: Thomas Graf
  Cc: nagar, jlan, pj, Valdis.Kletnieks, balbir, csturtiv,
	linux-kernel, hadi, netdev

Thomas Graf <tgraf@suug.ch> wrote:
>
> * Shailabh Nagar <nagar@watson.ibm.com> 2006-07-06 07:37
> > @@ -37,9 +45,26 @@ static struct nla_policy taskstats_cmd_g
> >  __read_mostly = {
> >  	[TASKSTATS_CMD_ATTR_PID]  = { .type = NLA_U32 },
> >  	[TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
> > +	[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
> > +	[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
> 
> > +		na = info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
> > +		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
> > +			return -E2BIG;
> > +		rc = cpulist_parse((char *)nla_data(na), mask);
> 
> This isn't safe, the data in the attribute is not guaranteed to be
> NUL terminated. Still it's probably me to blame for not making
> this more obvious in the API.
> 

Thanks, that was an unpleasant bug.

> I've attached a patch below extending the API to make it easier
> for interfaces using NUL termianted strings,

In the interests of keeping this work decoupled from netlink enhancements
I'd propose the below.  Is it bad to modify the data at nla_data()?


--- a/kernel/taskstats.c~per-task-delay-accounting-taskstats-interface-control-exit-data-through-cpumasks-fix
+++ a/kernel/taskstats.c
@@ -299,6 +299,23 @@ cleanup:
 	return 0;
 }
 
+static int parse(struct nlattr *na, cpumask_t *mask)
+{
+	char *data;
+	int len;
+
+	if (na == NULL)
+		return 1;
+	len = nla_len(na);
+	if (len > TASKSTATS_CPUMASK_MAXLEN)
+		return -E2BIG;
+	if (len < 1)
+		return -EINVAL;
+	data = nla_data(na);
+	data[len - 1] = '\0';
+	return cpulist_parse(data, *mask);
+}
+
 static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 {
 	int rc = 0;
@@ -309,27 +326,17 @@ static int taskstats_user_cmd(struct sk_
 	struct nlattr *na;
 	cpumask_t mask;
 
-	if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) {
-		na = info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
-		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
-			return -E2BIG;
-		rc = cpulist_parse((char *)nla_data(na), mask);
-		if (rc)
-			return rc;
-		rc = add_del_listener(info->snd_pid, &mask, REGISTER);
+	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask);
+	if (rc < 0)
 		return rc;
-	}
+	if (rc == 0)
+		return add_del_listener(info->snd_pid, &mask, REGISTER);
 
-	if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) {
-		na = info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK];
-		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
-			return -E2BIG;
-		rc = cpulist_parse((char *)nla_data(na), mask);
-		if (rc)
-			return rc;
-		rc = add_del_listener(info->snd_pid, &mask, DEREGISTER);
+	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask);
+	if (rc < 0)
 		return rc;
-	}
+	if (rc == 0)
+		return add_del_listener(info->snd_pid, &mask, DEREGISTER);
 
 	/*
 	 * Size includes space for nested attributes
_


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]
  2006-07-06 21:48       ` Andrew Morton
@ 2006-07-06 22:27         ` Shailabh Nagar
  2006-07-06 22:56           ` Andrew Morton
  2006-07-06 22:40         ` Thomas Graf
  1 sibling, 1 reply; 14+ messages in thread
From: Shailabh Nagar @ 2006-07-06 22:27 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Thomas Graf, jlan, pj, Valdis.Kletnieks, balbir, csturtiv,
	linux-kernel, hadi, netdev

Andrew Morton wrote:
> Thomas Graf <tgraf@suug.ch> wrote:
> 
>>* Shailabh Nagar <nagar@watson.ibm.com> 2006-07-06 07:37
>>
>>>@@ -37,9 +45,26 @@ static struct nla_policy taskstats_cmd_g
>>> __read_mostly = {
>>> 	[TASKSTATS_CMD_ATTR_PID]  = { .type = NLA_U32 },
>>> 	[TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
>>>+	[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
>>>+	[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
>>
>>>+		na = info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
>>>+		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
>>>+			return -E2BIG;
>>>+		rc = cpulist_parse((char *)nla_data(na), mask);
>>
>>This isn't safe, the data in the attribute is not guaranteed to be
>>NUL terminated. Still it's probably me to blame for not making
>>this more obvious in the API.
>>
> 
> 
> Thanks, that was an unpleasant bug.
> 
> 
>>I've attached a patch below extending the API to make it easier
>>for interfaces using NUL termianted strings,
> 
> 
> In the interests of keeping this work decoupled from netlink enhancements
> I'd propose the below.  

The patch looks good. I was also thinking of simply modifying the input
to have the null termination and modify later when netlink provides
generic support.


> Is it bad to modify the data at nla_data()?
> 
> 
> --- a/kernel/taskstats.c~per-task-delay-accounting-taskstats-interface-control-exit-data-through-cpumasks-fix
> +++ a/kernel/taskstats.c
> @@ -299,6 +299,23 @@ cleanup:
>  	return 0;
>  }
>  
> +static int parse(struct nlattr *na, cpumask_t *mask)
> +{
> +	char *data;
> +	int len;
> +
> +	if (na == NULL)
> +		return 1;
> +	len = nla_len(na);
> +	if (len > TASKSTATS_CPUMASK_MAXLEN)
> +		return -E2BIG;
> +	if (len < 1)
> +		return -EINVAL;
> +	data = nla_data(na);
> +	data[len - 1] = '\0';
> +	return cpulist_parse(data, *mask);
> +}
> +
>  static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
>  {
>  	int rc = 0;
> @@ -309,27 +326,17 @@ static int taskstats_user_cmd(struct sk_
>  	struct nlattr *na;
>  	cpumask_t mask;
>  
> -	if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) {
> -		na = info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
> -		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
> -			return -E2BIG;
> -		rc = cpulist_parse((char *)nla_data(na), mask);
> -		if (rc)
> -			return rc;
> -		rc = add_del_listener(info->snd_pid, &mask, REGISTER);
> +	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask);
> +	if (rc < 0)
>  		return rc;
> -	}
> +	if (rc == 0)
> +		return add_del_listener(info->snd_pid, &mask, REGISTER);
>  
> -	if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) {
> -		na = info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK];
> -		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
> -			return -E2BIG;
> -		rc = cpulist_parse((char *)nla_data(na), mask);
> -		if (rc)
> -			return rc;
> -		rc = add_del_listener(info->snd_pid, &mask, DEREGISTER);
> +	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask);
> +	if (rc < 0)
>  		return rc;
> -	}
> +	if (rc == 0)
> +		return add_del_listener(info->snd_pid, &mask, DEREGISTER);
>  
>  	/*
>  	 * Size includes space for nested attributes
> _
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]
  2006-07-06 21:48       ` Andrew Morton
  2006-07-06 22:27         ` Shailabh Nagar
@ 2006-07-06 22:40         ` Thomas Graf
  2006-07-06 23:05           ` Andrew Morton
  1 sibling, 1 reply; 14+ messages in thread
From: Thomas Graf @ 2006-07-06 22:40 UTC (permalink / raw)
  To: Andrew Morton
  Cc: nagar, jlan, pj, Valdis.Kletnieks, balbir, csturtiv,
	linux-kernel, hadi, netdev

* Andrew Morton <akpm@osdl.org> 2006-07-06 14:48
> In the interests of keeping this work decoupled from netlink enhancements
> I'd propose the below.  Is it bad to modify the data at nla_data()?

Yes, it points into a skb data buffer which may be shared by sitting
on other queues if the message is to be broadcasted. In this case it
would be harmless but the policy is to leave it unmodified. Otherwise
I agree it's better to move forward and not wait for the API change.

> --- a/kernel/taskstats.c~per-task-delay-accounting-taskstats-interface-control-exit-data-through-cpumasks-fix
> +++ a/kernel/taskstats.c
> @@ -299,6 +299,23 @@ cleanup:
>  	return 0;
>  }
>  
> +static int parse(struct nlattr *na, cpumask_t *mask)
> +{
> +	char *data;
> +	int len;
> +
> +	if (na == NULL)
> +		return 1;
> +	len = nla_len(na);
> +	if (len > TASKSTATS_CPUMASK_MAXLEN)
> +		return -E2BIG;
> +	if (len < 1)
> +		return -EINVAL;
> +	data = nla_data(na);
> +	data[len - 1] = '\0';
> +	return cpulist_parse(data, *mask);
> +}

Usually this is done by using strlcpy:

Example fro genetlink.c
if (info->attrs[CTRL_ATTR_FAMILY_NAME]) {
	char name[GENL_NAMSIZ];

	if (nla_strlcpy(name, info->attrs[CTRL_ATTR_FAMILY_NAME],
			GENL_NAMSIZ) >= GENL_NAMSIZ)
		goto errout;

	res = genl_family_find_byname(name);
}

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]
  2006-07-06 22:27         ` Shailabh Nagar
@ 2006-07-06 22:56           ` Andrew Morton
  2006-07-07 10:21             ` Thomas Graf
  0 siblings, 1 reply; 14+ messages in thread
From: Andrew Morton @ 2006-07-06 22:56 UTC (permalink / raw)
  To: Shailabh Nagar
  Cc: tgraf, jlan, pj, Valdis.Kletnieks, balbir, csturtiv,
	linux-kernel, hadi, netdev

Shailabh Nagar <nagar@watson.ibm.com> wrote:
>
> Andrew Morton wrote:
> > Thomas Graf <tgraf@suug.ch> wrote:
> > 
> >>* Shailabh Nagar <nagar@watson.ibm.com> 2006-07-06 07:37
> >>
> >>>@@ -37,9 +45,26 @@ static struct nla_policy taskstats_cmd_g
> >>> __read_mostly = {
> >>> 	[TASKSTATS_CMD_ATTR_PID]  = { .type = NLA_U32 },
> >>> 	[TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
> >>>+	[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
> >>>+	[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
> >>
> >>>+		na = info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
> >>>+		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
> >>>+			return -E2BIG;
> >>>+		rc = cpulist_parse((char *)nla_data(na), mask);
> >>
> >>This isn't safe, the data in the attribute is not guaranteed to be
> >>NUL terminated. Still it's probably me to blame for not making
> >>this more obvious in the API.
> >>
> > 
> > 
> > Thanks, that was an unpleasant bug.
> > 
> > 
> >>I've attached a patch below extending the API to make it easier
> >>for interfaces using NUL termianted strings,
> > 
> > 
> > In the interests of keeping this work decoupled from netlink enhancements
> > I'd propose the below.  
> 
> The patch looks good. I was also thinking of simply modifying the input
> to have the null termination and modify later when netlink provides
> generic support.
> 
> 

Yup.  Thomas, what's the testing status of the netlink patch you sent?  Should I
queue it up and start plagueing people with it?


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]
  2006-07-06 22:40         ` Thomas Graf
@ 2006-07-06 23:05           ` Andrew Morton
  2006-07-07 10:16             ` Thomas Graf
  0 siblings, 1 reply; 14+ messages in thread
From: Andrew Morton @ 2006-07-06 23:05 UTC (permalink / raw)
  To: Thomas Graf
  Cc: nagar, jlan, pj, Valdis.Kletnieks, balbir, csturtiv,
	linux-kernel, hadi, netdev

Thomas Graf <tgraf@suug.ch> wrote:
>
> * Andrew Morton <akpm@osdl.org> 2006-07-06 14:48
> > In the interests of keeping this work decoupled from netlink enhancements
> > I'd propose the below.  Is it bad to modify the data at nla_data()?
> 
> Yes, it points into a skb data buffer which may be shared by sitting
> on other queues if the message is to be broadcasted. In this case it
> would be harmless but the policy is to leave it unmodified.

Yup, sleazy-but-safe.

> 
> > --- a/kernel/taskstats.c~per-task-delay-accounting-taskstats-interface-control-exit-data-through-cpumasks-fix
> > +++ a/kernel/taskstats.c
> > @@ -299,6 +299,23 @@ cleanup:
> >  	return 0;
> >  }
> >  
> > +static int parse(struct nlattr *na, cpumask_t *mask)
> > +{
> > +	char *data;
> > +	int len;
> > +
> > +	if (na == NULL)
> > +		return 1;
> > +	len = nla_len(na);
> > +	if (len > TASKSTATS_CPUMASK_MAXLEN)
> > +		return -E2BIG;
> > +	if (len < 1)
> > +		return -EINVAL;
> > +	data = nla_data(na);
> > +	data[len - 1] = '\0';
> > +	return cpulist_parse(data, *mask);
> > +}
> 
> Usually this is done by using strlcpy:

hm, nla_strlcpy() looks more complex than it needs to be.  We really need
nla_kstrndup() ;)

Oh well.  This?


diff -puN kernel/taskstats.c~per-task-delay-accounting-taskstats-interface-control-exit-data-through-cpumasks-fix kernel/taskstats.c
--- a/kernel/taskstats.c~per-task-delay-accounting-taskstats-interface-control-exit-data-through-cpumasks-fix
+++ a/kernel/taskstats.c
@@ -299,6 +299,28 @@ cleanup:
 	return 0;
 }
 
+static int parse(struct nlattr *na, cpumask_t *mask)
+{
+	char *data;
+	int len;
+	int ret;
+
+	if (na == NULL)
+		return 1;
+	len = nla_len(na);
+	if (len > TASKSTATS_CPUMASK_MAXLEN)
+		return -E2BIG;
+	if (len < 1)
+		return -EINVAL;
+	data = kmalloc(len, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	nla_strlcpy(data, na, len);
+	ret = cpulist_parse(data, *mask);
+	kfree(data);
+	return ret;
+}
+
 static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 {
 	int rc = 0;
@@ -309,27 +331,17 @@ static int taskstats_user_cmd(struct sk_
 	struct nlattr *na;
 	cpumask_t mask;
 
-	if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) {
-		na = info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
-		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
-			return -E2BIG;
-		rc = cpulist_parse((char *)nla_data(na), mask);
-		if (rc)
-			return rc;
-		rc = add_del_listener(info->snd_pid, &mask, REGISTER);
+	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask);
+	if (rc < 0)
 		return rc;
-	}
+	if (rc == 0)
+		return add_del_listener(info->snd_pid, &mask, REGISTER);
 
-	if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) {
-		na = info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK];
-		if (nla_len(na) > TASKSTATS_CPUMASK_MAXLEN)
-			return -E2BIG;
-		rc = cpulist_parse((char *)nla_data(na), mask);
-		if (rc)
-			return rc;
-		rc = add_del_listener(info->snd_pid, &mask, DEREGISTER);
+	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask);
+	if (rc < 0)
 		return rc;
-	}
+	if (rc == 0)
+		return add_del_listener(info->snd_pid, &mask, DEREGISTER);
 
 	/*
 	 * Size includes space for nested attributes
_


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]
  2006-07-06  9:56 ` Andrew Morton
  2006-07-06 10:44   ` Shailabh Nagar
  2006-07-06 11:37   ` Shailabh Nagar
@ 2006-07-07  6:18   ` Paul Jackson
  2 siblings, 0 replies; 14+ messages in thread
From: Paul Jackson @ 2006-07-07  6:18 UTC (permalink / raw)
  To: Andrew Morton
  Cc: nagar, jlan, Valdis.Kletnieks, balbir, csturtiv, linux-kernel,
	hadi, netdev

Andrew wrote:
> Your email client performs space-stuffing.

Shailabh,

Some of us find that it is easier and more reliable to use a special
purpose script to send patches, rather than trying to do so via our
email client.  Even though my email client, Sylpheed, probably sends
patches just fine, I enjoy the convenience of preparing the patches
and mailing instructions in my editor, before sending them off with
such a utility.

One possible such utility is 'sendpatchset', which I maintain:

  http://www.speakeasy.org/~pj99/sgi/sendpatchset

It's a script, with the documentation embedded as the help message.

Especially when sending off multiple patches as a set, it provides
more reliable results than trying to prepare and send multiple such
simultaneous messages from the typical email client.

-- 
                  I won't rest till it's the best ...
                  Programmer, Linux Scalability
                  Paul Jackson <pj@sgi.com> 1.925.600.0401

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]
  2006-07-06 23:05           ` Andrew Morton
@ 2006-07-07 10:16             ` Thomas Graf
  0 siblings, 0 replies; 14+ messages in thread
From: Thomas Graf @ 2006-07-07 10:16 UTC (permalink / raw)
  To: Andrew Morton
  Cc: nagar, jlan, pj, Valdis.Kletnieks, balbir, csturtiv,
	linux-kernel, hadi, netdev

* Andrew Morton <akpm@osdl.org> 2006-07-06 16:05
> hm, nla_strlcpy() looks more complex than it needs to be.  We really need
> nla_kstrndup() ;)
> 
> Oh well.  This?

Looks good.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]
  2006-07-06 22:56           ` Andrew Morton
@ 2006-07-07 10:21             ` Thomas Graf
  0 siblings, 0 replies; 14+ messages in thread
From: Thomas Graf @ 2006-07-07 10:21 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Shailabh Nagar, jlan, pj, Valdis.Kletnieks, balbir, csturtiv,
	linux-kernel, hadi, netdev

* Andrew Morton <akpm@osdl.org> 2006-07-06 15:56
> Yup.  Thomas, what's the testing status of the netlink patch you sent?  Should I
> queue it up and start plagueing people with it?

It survived feeding it with oversized strings etc. Feel free
to queue it up.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2006-07-07 10:21 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-07-06  9:28 [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks] Shailabh Nagar
2006-07-06  9:56 ` Andrew Morton
2006-07-06 10:44   ` Shailabh Nagar
2006-07-06 11:37   ` Shailabh Nagar
2006-07-06 12:08     ` Thomas Graf
2006-07-06 13:21       ` Shailabh Nagar
2006-07-06 21:48       ` Andrew Morton
2006-07-06 22:27         ` Shailabh Nagar
2006-07-06 22:56           ` Andrew Morton
2006-07-07 10:21             ` Thomas Graf
2006-07-06 22:40         ` Thomas Graf
2006-07-06 23:05           ` Andrew Morton
2006-07-07 10:16             ` Thomas Graf
2006-07-07  6:18   ` Paul Jackson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.